diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/README.md b/README.md index 2ecfc01..c6fc12b 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ With this the icecream daemon is launched when macOS starts or if it stops. You now need to setup some environment variable. In your .bashrc add : ```bash -$ export ICECC_VERSION=Darwin17_x86_64:/path/to/chromium/src/icecream/clang_darwin_on_darwin.tar.gz,x86_64:/path/to/chromium/src/icecream/clang_darwin_on_linux.tar.gz +$ export ICECC_VERSION=Darwin18_x86_64:/path/to/chromium/src/icecream/clang_darwin_on_darwin.tar.gz,x86_64:/path/to/chromium/src/icecream/clang_darwin_on_linux.tar.gz $ export ICECC_CLANG_REMOTE_CPP=1 $ export PATH=path/to/chromium/src/icecream/icecc/:$PATH ``` diff --git a/bin/clang b/bin/clang index 44854f6..bc734aa 120000 --- a/bin/clang +++ b/bin/clang @@ -1 +1 @@ -clang-7 \ No newline at end of file +clang-8 \ No newline at end of file diff --git a/bin/clang++ b/bin/clang++ index 44854f6..bc734aa 120000 --- a/bin/clang++ +++ b/bin/clang++ @@ -1 +1 @@ -clang-7 \ No newline at end of file +clang-8 \ No newline at end of file diff --git a/bin/clang-7 b/bin/clang-8 similarity index 80% rename from bin/clang-7 rename to bin/clang-8 index c8d9269..5e0602b 100755 Binary files a/bin/clang-7 and b/bin/clang-8 differ diff --git a/bin/clang-cl b/bin/clang-cl index 44854f6..bc734aa 120000 --- a/bin/clang-cl +++ b/bin/clang-cl @@ -1 +1 @@ -clang-7 \ No newline at end of file +clang-8 \ No newline at end of file diff --git a/bin/clang-cpp b/bin/clang-cpp new file mode 120000 index 0000000..bc734aa --- /dev/null +++ b/bin/clang-cpp @@ -0,0 +1 @@ +clang-8 \ No newline at end of file diff --git a/bin/llvm-symbolizer b/bin/llvm-symbolizer index 9c83e94..ea40c90 100755 Binary files a/bin/llvm-symbolizer and b/bin/llvm-symbolizer differ diff --git a/bin/sancov b/bin/sancov index 4ec38c4..6bfa930 100755 Binary files a/bin/sancov and b/bin/sancov differ diff --git a/clang_darwin_on_darwin.tar.gz b/clang_darwin_on_darwin.tar.gz index cd031f1..067bf3f 100644 Binary files a/clang_darwin_on_darwin.tar.gz and b/clang_darwin_on_darwin.tar.gz differ diff --git a/clang_darwin_on_linux.tar.gz b/clang_darwin_on_linux.tar.gz index fb0de07..3a59069 100644 Binary files a/clang_darwin_on_linux.tar.gz and b/clang_darwin_on_linux.tar.gz differ diff --git a/include/c++/v1/__bit_reference b/include/c++/v1/__bit_reference index 3e4a21d..c208af2 100644 --- a/include/c++/v1/__bit_reference +++ b/include/c++/v1/__bit_reference @@ -12,6 +12,7 @@ #define _LIBCPP___BIT_REFERENCE #include <__config> +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -254,18 +255,18 @@ __count_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _VSTD::__pop_count(*__first.__seg_ & __m); + __r = _VSTD::__popcount(*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _VSTD::__pop_count(*__first.__seg_); + __r += _VSTD::__popcount(*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _VSTD::__pop_count(*__first.__seg_ & __m); + __r += _VSTD::__popcount(*__first.__seg_ & __m); } return __r; } @@ -285,18 +286,18 @@ __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_typ __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _VSTD::__pop_count(~*__first.__seg_ & __m); + __r = _VSTD::__popcount(~*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _VSTD::__pop_count(~*__first.__seg_); + __r += _VSTD::__popcount(~*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _VSTD::__pop_count(~*__first.__seg_ & __m); + __r += _VSTD::__popcount(~*__first.__seg_ & __m); } return __r; } diff --git a/include/c++/v1/__bsd_locale_fallbacks.h b/include/c++/v1/__bsd_locale_fallbacks.h index 5e9e094..3097b01 100644 --- a/include/c++/v1/__bsd_locale_fallbacks.h +++ b/include/c++/v1/__bsd_locale_fallbacks.h @@ -24,28 +24,28 @@ _LIBCPP_BEGIN_NAMESPACE_STD -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l) { __libcpp_locale_guard __current(__l); return MB_CUR_MAX; } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY wint_t __libcpp_btowc_l(int __c, locale_t __l) { __libcpp_locale_guard __current(__l); return btowc(__c); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY int __libcpp_wctob_l(wint_t __c, locale_t __l) { __libcpp_locale_guard __current(__l); return wctob(__c); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_wcsnrtombs_l(char *__dest, const wchar_t **__src, size_t __nwc, size_t __len, mbstate_t *__ps, locale_t __l) { @@ -53,14 +53,14 @@ size_t __libcpp_wcsnrtombs_l(char *__dest, const wchar_t **__src, size_t __nwc, return wcsnrtombs(__dest, __src, __nwc, __len, __ps); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_wcrtomb_l(char *__s, wchar_t __wc, mbstate_t *__ps, locale_t __l) { __libcpp_locale_guard __current(__l); return wcrtomb(__s, __wc, __ps); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_mbsnrtowcs_l(wchar_t * __dest, const char **__src, size_t __nms, size_t __len, mbstate_t *__ps, locale_t __l) { @@ -68,7 +68,7 @@ size_t __libcpp_mbsnrtowcs_l(wchar_t * __dest, const char **__src, size_t __nms, return mbsnrtowcs(__dest, __src, __nms, __len, __ps); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_mbrtowc_l(wchar_t *__pwc, const char *__s, size_t __n, mbstate_t *__ps, locale_t __l) { @@ -76,28 +76,28 @@ size_t __libcpp_mbrtowc_l(wchar_t *__pwc, const char *__s, size_t __n, return mbrtowc(__pwc, __s, __n, __ps); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY int __libcpp_mbtowc_l(wchar_t *__pwc, const char *__pmb, size_t __max, locale_t __l) { __libcpp_locale_guard __current(__l); return mbtowc(__pwc, __pmb, __max); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_mbrlen_l(const char *__s, size_t __n, mbstate_t *__ps, locale_t __l) { __libcpp_locale_guard __current(__l); return mbrlen(__s, __n, __ps); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY lconv *__libcpp_localeconv_l(locale_t __l) { __libcpp_locale_guard __current(__l); return localeconv(); } -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len, mbstate_t *__ps, locale_t __l) { diff --git a/include/c++/v1/__config b/include/c++/v1/__config index 8e42abe..bababbc 100644 --- a/include/c++/v1/__config +++ b/include/c++/v1/__config @@ -33,15 +33,23 @@ # define _GNUC_VER_NEW 0 #endif -#define _LIBCPP_VERSION 7000 +#define _LIBCPP_VERSION 8000 #ifndef _LIBCPP_ABI_VERSION -# ifdef __Fuchsia__ -# define _LIBCPP_ABI_VERSION 2 +# define _LIBCPP_ABI_VERSION 1 +#endif + +#ifndef _LIBCPP_STD_VER +# if __cplusplus <= 201103L +# define _LIBCPP_STD_VER 11 +# elif __cplusplus <= 201402L +# define _LIBCPP_STD_VER 14 +# elif __cplusplus <= 201703L +# define _LIBCPP_STD_VER 17 # else -# define _LIBCPP_ABI_VERSION 1 +# define _LIBCPP_STD_VER 18 // current year, or date of c++2a ratification # endif -#endif +#endif // _LIBCPP_STD_VER #if defined(__ELF__) # define _LIBCPP_OBJECT_FORMAT_ELF 1 @@ -149,10 +157,8 @@ #define __has_keyword(__x) !(__is_identifier(__x)) -#ifdef __has_include -# define __libcpp_has_include(__x) __has_include(__x) -#else -# define __libcpp_has_include(__x) 0 +#ifndef __has_include +#define __has_include(...) 0 #endif #if defined(__clang__) @@ -318,6 +324,31 @@ # define _LIBCPP_NO_CFI #endif +#if __ISO_C_VISIBLE >= 2011 || __cplusplus >= 201103L +# if defined(__FreeBSD__) +# define _LIBCPP_HAS_QUICK_EXIT +# define _LIBCPP_HAS_C11_FEATURES +# elif defined(__Fuchsia__) +# define _LIBCPP_HAS_QUICK_EXIT +# define _LIBCPP_HAS_TIMESPEC_GET +# define _LIBCPP_HAS_C11_FEATURES +# elif defined(__linux__) +# if !defined(_LIBCPP_HAS_MUSL_LIBC) +# if _LIBCPP_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) +# define _LIBCPP_HAS_QUICK_EXIT +# endif +# if _LIBCPP_GLIBC_PREREQ(2, 17) +# define _LIBCPP_HAS_C11_FEATURES +# define _LIBCPP_HAS_TIMESPEC_GET +# endif +# else // defined(_LIBCPP_HAS_MUSL_LIBC) +# define _LIBCPP_HAS_QUICK_EXIT +# define _LIBCPP_HAS_TIMESPEC_GET +# define _LIBCPP_HAS_C11_FEATURES +# endif +# endif // __linux__ +#endif + #if defined(_LIBCPP_COMPILER_CLANG) // _LIBCPP_ALTERNATE_STRING_LAYOUT is an old name for @@ -420,28 +451,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES #endif -#if __ISO_C_VISIBLE >= 2011 || __cplusplus >= 201103L -# if defined(__FreeBSD__) -# define _LIBCPP_HAS_QUICK_EXIT -# define _LIBCPP_HAS_C11_FEATURES -# elif defined(__Fuchsia__) -# define _LIBCPP_HAS_QUICK_EXIT -# define _LIBCPP_HAS_C11_FEATURES -# elif defined(__linux__) -# if !defined(_LIBCPP_HAS_MUSL_LIBC) -# if _LIBCPP_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) -# define _LIBCPP_HAS_QUICK_EXIT -# endif -# if _LIBCPP_GLIBC_PREREQ(2, 17) -# define _LIBCPP_HAS_C11_FEATURES -# endif -# else // defined(_LIBCPP_HAS_MUSL_LIBC) -# define _LIBCPP_HAS_QUICK_EXIT -# define _LIBCPP_HAS_C11_FEATURES -# endif -# endif // __linux__ -#endif - #if !(__has_feature(cxx_noexcept)) #define _LIBCPP_HAS_NO_NOEXCEPT #endif @@ -481,6 +490,8 @@ namespace std { #define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS #endif +#define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__)) + #elif defined(_LIBCPP_COMPILER_GCC) #define _ALIGNAS(x) __attribute__((__aligned__(x))) @@ -498,7 +509,7 @@ namespace std { #define _LIBCPP_HAS_IS_BASE_OF #endif -#if !__EXCEPTIONS +#if !__EXCEPTIONS && !defined(_LIBCPP_NO_EXCEPTIONS) #define _LIBCPP_NO_EXCEPTIONS #endif @@ -572,6 +583,8 @@ namespace std { #define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS #endif +#define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__)) + #elif defined(_LIBCPP_COMPILER_MSVC) #define _LIBCPP_TOSTRING2(x) #x @@ -586,9 +599,6 @@ namespace std { #define _LIBCPP_HAS_NO_CONSTEXPR #define _LIBCPP_HAS_NO_CXX14_CONSTEXPR #define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES -#if _MSC_VER <= 1800 -#define _LIBCPP_HAS_NO_UNICODE_CHARS -#endif #define _LIBCPP_HAS_NO_NOEXCEPT #define __alignof__ __alignof #define _LIBCPP_NORETURN __declspec(noreturn) @@ -607,6 +617,10 @@ namespace std { #define _LIBCPP_HAS_NO_ASAN +#define _LIBCPP_ALWAYS_INLINE __forceinline + +#define _LIBCPP_HAS_NO_VECTOR_EXTENSION + #elif defined(_LIBCPP_COMPILER_IBM) #define _ALIGNAS(x) __attribute__((__aligned__(x))) @@ -637,8 +651,26 @@ namespace std { #define _LIBCPP_HAS_NO_ASAN +#define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__)) + +#define _LIBCPP_HAS_NO_VECTOR_EXTENSION + #endif // _LIBCPP_COMPILER_[CLANG|GCC|MSVC|IBM] +#if _LIBCPP_STD_VER >= 17 +#define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \ + _LIBCPP_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem { +#else +#define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \ + _LIBCPP_BEGIN_NAMESPACE_STD namespace __fs { namespace filesystem { +#endif + +#define _LIBCPP_END_NAMESPACE_FILESYSTEM \ + _LIBCPP_END_NAMESPACE_STD } } + +#define _VSTD_FS _VSTD::__fs::filesystem + + #if defined(_LIBCPP_OBJECT_FORMAT_COFF) #ifdef _DLL @@ -673,16 +705,6 @@ namespace std { #define _LIBCPP_TEMPLATE_VIS #define _LIBCPP_ENUM_VIS -#if defined(_LIBCPP_COMPILER_MSVC) -# define _LIBCPP_INLINE_VISIBILITY __forceinline -# define _LIBCPP_ALWAYS_INLINE __forceinline -# define _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY __forceinline -#else -# define _LIBCPP_INLINE_VISIBILITY __attribute__ ((__always_inline__)) -# define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__)) -# define _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY __attribute__ ((__always_inline__)) -#endif - #endif // defined(_LIBCPP_OBJECT_FORMAT_COFF) #ifndef _LIBCPP_HIDDEN @@ -766,30 +788,42 @@ namespace std { #define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS #endif -#ifndef _LIBCPP_INLINE_VISIBILITY -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCPP_INLINE_VISIBILITY __attribute__ ((__visibility__("hidden"), __always_inline__)) +#if __has_attribute(internal_linkage) +# define _LIBCPP_INTERNAL_LINKAGE __attribute__ ((internal_linkage)) +#else +# define _LIBCPP_INTERNAL_LINKAGE _LIBCPP_ALWAYS_INLINE +#endif + +#ifndef _LIBCPP_HIDE_FROM_ABI_PER_TU +# ifndef _LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT +# define _LIBCPP_HIDE_FROM_ABI_PER_TU 0 # else -# define _LIBCPP_INLINE_VISIBILITY __attribute__ ((__always_inline__)) +# define _LIBCPP_HIDE_FROM_ABI_PER_TU 1 # endif #endif -#ifndef _LIBCPP_ALWAYS_INLINE -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCPP_ALWAYS_INLINE __attribute__ ((__visibility__("hidden"), __always_inline__)) +#ifndef _LIBCPP_HIDE_FROM_ABI +# if _LIBCPP_HIDE_FROM_ABI_PER_TU +# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_INTERNAL_LINKAGE # else -# define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__)) +# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_ALWAYS_INLINE # endif #endif -#ifndef _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY __attribute__((__visibility__("default"), __always_inline__)) +#ifdef _LIBCPP_BUILDING_LIBRARY +# if _LIBCPP_ABI_VERSION > 1 +# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # else -# define _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY __attribute__((__always_inline__)) +# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 # endif +#else +# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI #endif +// Just so we can migrate to the new macros gradually. +#define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI +#define _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI_AFTER_V1 + #ifndef _LIBCPP_PREFERRED_OVERLOAD # if __has_attribute(__enable_if__) # define _LIBCPP_PREFERRED_OVERLOAD __attribute__ ((__enable_if__(true, ""))) @@ -892,9 +926,9 @@ template struct __static_assert_check {}; # define _LIBCPP_DECLARE_STRONG_ENUM(x) struct _LIBCPP_TYPE_VIS x { enum __lx # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \ __lx __v_; \ - _LIBCPP_ALWAYS_INLINE x(__lx __v) : __v_(__v) {} \ - _LIBCPP_ALWAYS_INLINE explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \ - _LIBCPP_ALWAYS_INLINE operator int() const {return __v_;} \ + _LIBCPP_INLINE_VISIBILITY x(__lx __v) : __v_(__v) {} \ + _LIBCPP_INLINE_VISIBILITY explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \ + _LIBCPP_INLINE_VISIBILITY operator int() const {return __v_;} \ }; #else // _LIBCPP_HAS_NO_STRONG_ENUMS # define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class _LIBCPP_ENUM_VIS x @@ -947,6 +981,12 @@ template struct __static_assert_check {}; #define _DECLARE_C99_LDBL_MATH 1 #endif +// If we are getting operator new from the MSVC CRT, then allocation overloads +// for align_val_t were added in 19.12, aka VS 2017 version 15.3. +#if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912 +#define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION +#endif + #if defined(__APPLE__) # if !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) @@ -959,6 +999,11 @@ template struct __static_assert_check {}; # endif #endif // defined(__APPLE__) +#if !defined(_LIBCPP_HAS_NO_ALIGNED_ALLOCATION) && \ + !defined(_LIBCPP_BUILDING_LIBRARY) && \ + (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606) +# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION +#endif #if defined(__APPLE__) || defined(__FreeBSD__) #define _LIBCPP_HAS_DEFAULTRUNELOCALE @@ -968,30 +1013,42 @@ template struct __static_assert_check {}; #define _LIBCPP_WCTYPE_IS_MASK #endif -#ifndef _LIBCPP_STD_VER -# if __cplusplus <= 201103L -# define _LIBCPP_STD_VER 11 -# elif __cplusplus <= 201402L -# define _LIBCPP_STD_VER 14 -# elif __cplusplus <= 201703L -# define _LIBCPP_STD_VER 17 +// Deprecation macros. +// Deprecations warnings are only enabled when _LIBCPP_ENABLE_DEPRECATION_WARNINGS is defined. +#if defined(_LIBCPP_ENABLE_DEPRECATION_WARNINGS) +# if __has_attribute(deprecated) +# define _LIBCPP_DEPRECATED __attribute__ ((deprecated)) +# elif _LIBCPP_STD_VER > 11 +# define _LIBCPP_DEPRECATED [[deprecated]] # else -# define _LIBCPP_STD_VER 18 // current year, or date of c++2a ratification +# define _LIBCPP_DEPRECATED # endif -#endif // _LIBCPP_STD_VER - -#if _LIBCPP_STD_VER > 11 -# define _LIBCPP_DEPRECATED [[deprecated]] #else # define _LIBCPP_DEPRECATED #endif +#if !defined(_LIBCPP_CXX03_LANG) +# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED +#else +# define _LIBCPP_DEPRECATED_IN_CXX11 +#endif + +#if _LIBCPP_STD_VER >= 14 +# define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED +#else +# define _LIBCPP_DEPRECATED_IN_CXX14 +#endif + +#if _LIBCPP_STD_VER >= 17 +# define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED +#else +# define _LIBCPP_DEPRECATED_IN_CXX17 +#endif + #if _LIBCPP_STD_VER <= 11 # define _LIBCPP_EXPLICIT_AFTER_CXX11 -# define _LIBCPP_DEPRECATED_AFTER_CXX11 #else # define _LIBCPP_EXPLICIT_AFTER_CXX11 explicit -# define _LIBCPP_DEPRECATED_AFTER_CXX11 [[deprecated]] #endif #if _LIBCPP_STD_VER > 11 && !defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR) @@ -1012,8 +1069,30 @@ template struct __static_assert_check {}; # define _LIBCPP_CONSTEXPR_AFTER_CXX17 #endif -#if __has_cpp_attribute(nodiscard) && _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_DISABLE_NODISCARD_AFTER_CXX17) -# define _LIBCPP_NODISCARD_AFTER_CXX17 [[nodiscard]] +// The _LIBCPP_NODISCARD_ATTRIBUTE should only be used to define other +// NODISCARD macros to the correct attribute. +#if __has_cpp_attribute(nodiscard) || defined(_LIBCPP_COMPILER_MSVC) +# define _LIBCPP_NODISCARD_ATTRIBUTE [[nodiscard]] +#elif defined(_LIBCPP_COMPILER_CLANG) && !defined(_LIBCPP_CXX03_LANG) +# define _LIBCPP_NODISCARD_ATTRIBUTE [[clang::warn_unused_result]] +#else +// We can't use GCC's [[gnu::warn_unused_result]] and +// __attribute__((warn_unused_result)), because GCC does not silence them via +// (void) cast. +# define _LIBCPP_NODISCARD_ATTRIBUTE +#endif + +// _LIBCPP_NODISCARD_EXT may be used to apply [[nodiscard]] to entities not +// specified as such as an extension. +#if defined(_LIBCPP_ENABLE_NODISCARD) && !defined(_LIBCPP_DISABLE_NODISCARD_EXT) +# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD_ATTRIBUTE +#else +# define _LIBCPP_NODISCARD_EXT +#endif + +#if !defined(_LIBCPP_DISABLE_NODISCARD_AFTER_CXX17) && \ + (_LIBCPP_STD_VER > 17 || defined(_LIBCPP_ENABLE_NODISCARD)) +# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD_ATTRIBUTE #else # define _LIBCPP_NODISCARD_AFTER_CXX17 #endif @@ -1030,6 +1109,14 @@ template struct __static_assert_check {}; # define _LIBCPP_EXPLICIT_MOVE(x) (x) #endif +#ifndef _LIBCPP_CONSTEXPR_IF_NODEBUG +#if defined(_LIBCPP_DEBUG) || defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR) +#define _LIBCPP_CONSTEXPR_IF_NODEBUG +#else +#define _LIBCPP_CONSTEXPR_IF_NODEBUG constexpr +#endif +#endif + #ifndef _LIBCPP_HAS_NO_ASAN _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( const void *, const void *, const void *, const void *); @@ -1065,7 +1152,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( defined(__APPLE__) || \ defined(__CloudABI__) || \ defined(__sun__) || \ - (defined(__MINGW32__) && __libcpp_has_include()) + (defined(__MINGW32__) && __has_include()) # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_HAS_THREAD_API_WIN32 diff --git a/include/c++/v1/__errc b/include/c++/v1/__errc new file mode 100644 index 0000000..d0f00b7 --- /dev/null +++ b/include/c++/v1/__errc @@ -0,0 +1,218 @@ +// -*- C++ -*- +//===---------------------------- __errc ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ERRC +#define _LIBCPP___ERRC + +/* + system_error synopsis + +namespace std +{ + +enum class errc +{ + address_family_not_supported, // EAFNOSUPPORT + address_in_use, // EADDRINUSE + address_not_available, // EADDRNOTAVAIL + already_connected, // EISCONN + argument_list_too_long, // E2BIG + argument_out_of_domain, // EDOM + bad_address, // EFAULT + bad_file_descriptor, // EBADF + bad_message, // EBADMSG + broken_pipe, // EPIPE + connection_aborted, // ECONNABORTED + connection_already_in_progress, // EALREADY + connection_refused, // ECONNREFUSED + connection_reset, // ECONNRESET + cross_device_link, // EXDEV + destination_address_required, // EDESTADDRREQ + device_or_resource_busy, // EBUSY + directory_not_empty, // ENOTEMPTY + executable_format_error, // ENOEXEC + file_exists, // EEXIST + file_too_large, // EFBIG + filename_too_long, // ENAMETOOLONG + function_not_supported, // ENOSYS + host_unreachable, // EHOSTUNREACH + identifier_removed, // EIDRM + illegal_byte_sequence, // EILSEQ + inappropriate_io_control_operation, // ENOTTY + interrupted, // EINTR + invalid_argument, // EINVAL + invalid_seek, // ESPIPE + io_error, // EIO + is_a_directory, // EISDIR + message_size, // EMSGSIZE + network_down, // ENETDOWN + network_reset, // ENETRESET + network_unreachable, // ENETUNREACH + no_buffer_space, // ENOBUFS + no_child_process, // ECHILD + no_link, // ENOLINK + no_lock_available, // ENOLCK + no_message_available, // ENODATA + no_message, // ENOMSG + no_protocol_option, // ENOPROTOOPT + no_space_on_device, // ENOSPC + no_stream_resources, // ENOSR + no_such_device_or_address, // ENXIO + no_such_device, // ENODEV + no_such_file_or_directory, // ENOENT + no_such_process, // ESRCH + not_a_directory, // ENOTDIR + not_a_socket, // ENOTSOCK + not_a_stream, // ENOSTR + not_connected, // ENOTCONN + not_enough_memory, // ENOMEM + not_supported, // ENOTSUP + operation_canceled, // ECANCELED + operation_in_progress, // EINPROGRESS + operation_not_permitted, // EPERM + operation_not_supported, // EOPNOTSUPP + operation_would_block, // EWOULDBLOCK + owner_dead, // EOWNERDEAD + permission_denied, // EACCES + protocol_error, // EPROTO + protocol_not_supported, // EPROTONOSUPPORT + read_only_file_system, // EROFS + resource_deadlock_would_occur, // EDEADLK + resource_unavailable_try_again, // EAGAIN + result_out_of_range, // ERANGE + state_not_recoverable, // ENOTRECOVERABLE + stream_timeout, // ETIME + text_file_busy, // ETXTBSY + timed_out, // ETIMEDOUT + too_many_files_open_in_system, // ENFILE + too_many_files_open, // EMFILE + too_many_links, // EMLINK + too_many_symbolic_link_levels, // ELOOP + value_too_large, // EOVERFLOW + wrong_protocol_type // EPROTOTYPE +}; + +*/ + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Some error codes are not present on all platforms, so we provide equivalents +// for them: + +//enum class errc +_LIBCPP_DECLARE_STRONG_ENUM(errc) +{ + address_family_not_supported = EAFNOSUPPORT, + address_in_use = EADDRINUSE, + address_not_available = EADDRNOTAVAIL, + already_connected = EISCONN, + argument_list_too_long = E2BIG, + argument_out_of_domain = EDOM, + bad_address = EFAULT, + bad_file_descriptor = EBADF, + bad_message = EBADMSG, + broken_pipe = EPIPE, + connection_aborted = ECONNABORTED, + connection_already_in_progress = EALREADY, + connection_refused = ECONNREFUSED, + connection_reset = ECONNRESET, + cross_device_link = EXDEV, + destination_address_required = EDESTADDRREQ, + device_or_resource_busy = EBUSY, + directory_not_empty = ENOTEMPTY, + executable_format_error = ENOEXEC, + file_exists = EEXIST, + file_too_large = EFBIG, + filename_too_long = ENAMETOOLONG, + function_not_supported = ENOSYS, + host_unreachable = EHOSTUNREACH, + identifier_removed = EIDRM, + illegal_byte_sequence = EILSEQ, + inappropriate_io_control_operation = ENOTTY, + interrupted = EINTR, + invalid_argument = EINVAL, + invalid_seek = ESPIPE, + io_error = EIO, + is_a_directory = EISDIR, + message_size = EMSGSIZE, + network_down = ENETDOWN, + network_reset = ENETRESET, + network_unreachable = ENETUNREACH, + no_buffer_space = ENOBUFS, + no_child_process = ECHILD, + no_link = ENOLINK, + no_lock_available = ENOLCK, +#ifdef ENODATA + no_message_available = ENODATA, +#else + no_message_available = ENOMSG, +#endif + no_message = ENOMSG, + no_protocol_option = ENOPROTOOPT, + no_space_on_device = ENOSPC, +#ifdef ENOSR + no_stream_resources = ENOSR, +#else + no_stream_resources = ENOMEM, +#endif + no_such_device_or_address = ENXIO, + no_such_device = ENODEV, + no_such_file_or_directory = ENOENT, + no_such_process = ESRCH, + not_a_directory = ENOTDIR, + not_a_socket = ENOTSOCK, +#ifdef ENOSTR + not_a_stream = ENOSTR, +#else + not_a_stream = EINVAL, +#endif + not_connected = ENOTCONN, + not_enough_memory = ENOMEM, + not_supported = ENOTSUP, + operation_canceled = ECANCELED, + operation_in_progress = EINPROGRESS, + operation_not_permitted = EPERM, + operation_not_supported = EOPNOTSUPP, + operation_would_block = EWOULDBLOCK, + owner_dead = EOWNERDEAD, + permission_denied = EACCES, + protocol_error = EPROTO, + protocol_not_supported = EPROTONOSUPPORT, + read_only_file_system = EROFS, + resource_deadlock_would_occur = EDEADLK, + resource_unavailable_try_again = EAGAIN, + result_out_of_range = ERANGE, + state_not_recoverable = ENOTRECOVERABLE, +#ifdef ETIME + stream_timeout = ETIME, +#else + stream_timeout = ETIMEDOUT, +#endif + text_file_busy = ETXTBSY, + timed_out = ETIMEDOUT, + too_many_files_open_in_system = ENFILE, + too_many_files_open = EMFILE, + too_many_links = EMLINK, + too_many_symbolic_link_levels = ELOOP, + value_too_large = EOVERFLOW, + wrong_protocol_type = EPROTOTYPE +}; +_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(errc) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ERRC diff --git a/include/c++/v1/__functional_03 b/include/c++/v1/__functional_03 index 13d8a3d..0a3bfba 100644 --- a/include/c++/v1/__functional_03 +++ b/include/c++/v1/__functional_03 @@ -600,7 +600,10 @@ template function<_Rp()>& function<_Rp()>::operator=(const function& __f) { - function(__f).swap(*this); + if (__f) + function(__f).swap(*this); + else + *this = nullptr; return *this; } @@ -608,11 +611,12 @@ template function<_Rp()>& function<_Rp()>::operator=(nullptr_t) { - if (__f_ == (__base*)&__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); + __base* __t = __f_; __f_ = 0; + if (__t == (__base*)&__buf_) + __t->destroy(); + else if (__t) + __t->destroy_deallocate(); return *this; } @@ -876,7 +880,10 @@ template function<_Rp(_A0)>& function<_Rp(_A0)>::operator=(const function& __f) { - function(__f).swap(*this); + if (__f) + function(__f).swap(*this); + else + *this = nullptr; return *this; } @@ -884,11 +891,12 @@ template function<_Rp(_A0)>& function<_Rp(_A0)>::operator=(nullptr_t) { - if (__f_ == (__base*)&__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); + __base* __t = __f_; __f_ = 0; + if (__t == (__base*)&__buf_) + __t->destroy(); + else if (__t) + __t->destroy_deallocate(); return *this; } @@ -1152,7 +1160,10 @@ template function<_Rp(_A0, _A1)>& function<_Rp(_A0, _A1)>::operator=(const function& __f) { - function(__f).swap(*this); + if (__f) + function(__f).swap(*this); + else + *this = nullptr; return *this; } @@ -1160,11 +1171,12 @@ template function<_Rp(_A0, _A1)>& function<_Rp(_A0, _A1)>::operator=(nullptr_t) { - if (__f_ == (__base*)&__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); + __base* __t = __f_; __f_ = 0; + if (__t == (__base*)&__buf_) + __t->destroy(); + else if (__t) + __t->destroy_deallocate(); return *this; } @@ -1428,7 +1440,10 @@ template function<_Rp(_A0, _A1, _A2)>& function<_Rp(_A0, _A1, _A2)>::operator=(const function& __f) { - function(__f).swap(*this); + if (__f) + function(__f).swap(*this); + else + *this = nullptr; return *this; } @@ -1436,11 +1451,12 @@ template function<_Rp(_A0, _A1, _A2)>& function<_Rp(_A0, _A1, _A2)>::operator=(nullptr_t) { - if (__f_ == (__base*)&__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); + __base* __t = __f_; __f_ = 0; + if (__t == (__base*)&__buf_) + __t->destroy(); + else if (__t) + __t->destroy_deallocate(); return *this; } diff --git a/include/c++/v1/__functional_base b/include/c++/v1/__functional_base index 12af4dc..1887974 100644 --- a/include/c++/v1/__functional_base +++ b/include/c++/v1/__functional_base @@ -50,7 +50,7 @@ template #endif struct _LIBCPP_TEMPLATE_VIS less : binary_function<_Tp, _Tp, bool> { - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator()(const _Tp& __x, const _Tp& __y) const {return __x < __y;} }; @@ -59,7 +59,7 @@ struct _LIBCPP_TEMPLATE_VIS less : binary_function<_Tp, _Tp, bool> template <> struct _LIBCPP_TEMPLATE_VIS less { - template + template _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY auto operator()(_T1&& __t, _T2&& __u) const _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u))) @@ -552,7 +552,7 @@ template struct __is_transparent : false_type {}; template -struct __is_transparent<_Tp, _Up, +struct __is_transparent<_Tp, _Up, typename __void_t::type> : true_type {}; #endif @@ -561,7 +561,7 @@ struct __is_transparent<_Tp, _Up, struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { }; -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_MEMORY) +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) extern const allocator_arg_t allocator_arg; #else /* _LIBCPP_INLINE_VAR */ constexpr allocator_arg_t allocator_arg = allocator_arg_t(); @@ -646,16 +646,6 @@ void __user_alloc_construct_impl (integral_constant, _Tp *__storage, con new (__storage) _Tp (_VSTD::forward<_Args>(__args)..., __a); } -// FIXME: Theis should have a version which takes a non-const alloc. -template -inline _LIBCPP_INLINE_VISIBILITY -void __user_alloc_construct (_Tp *__storage, const _Allocator &__a, _Args &&... __args) -{ - __user_alloc_construct_impl( - __uses_alloc_ctor<_Tp, _Allocator>(), - __storage, __a, _VSTD::forward<_Args>(__args)... - ); -} #endif // _LIBCPP_CXX03_LANG _LIBCPP_END_NAMESPACE_STD diff --git a/include/c++/v1/__hash_table b/include/c++/v1/__hash_table index 3f430af..c77de96 100644 --- a/include/c++/v1/__hash_table +++ b/include/c++/v1/__hash_table @@ -32,13 +32,8 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#ifndef _LIBCPP_CXX03_LANG -template -union __hash_value_type; -#else template struct __hash_value_type; -#endif template ::value && !__libcpp_is_final<_Hash>::value> @@ -172,7 +167,7 @@ struct __hash_key_value_types { } #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - static __container_value_type&& __move(__node_value_type& __v) { + static __container_value_type&& __move(__node_value_type& __v) { return _VSTD::move(__v); } #endif @@ -184,7 +179,6 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { typedef _Tp mapped_type; typedef __hash_value_type<_Key, _Tp> __node_value_type; typedef pair __container_value_type; - typedef pair<_Key, _Tp> __nc_value_type; typedef __container_value_type __map_value_type; static const bool __is_map = true; @@ -198,7 +192,7 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { static typename enable_if<__is_same_uncvref<_Up, __node_value_type>::value, __container_value_type const&>::type __get_value(_Up& __t) { - return __t.__cc; + return __t.__get_value(); } template @@ -211,12 +205,12 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { _LIBCPP_INLINE_VISIBILITY static __container_value_type* __get_ptr(__node_value_type& __n) { - return _VSTD::addressof(__n.__cc); + return _VSTD::addressof(__n.__get_value()); } #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - static __nc_value_type&& __move(__node_value_type& __v) { - return _VSTD::move(__v.__nc); + static pair __move(__node_value_type& __v) { + return __v.__move(); } #endif @@ -865,6 +859,17 @@ public: template friend class __hash_map_node_destructor; }; +#if _LIBCPP_STD_VER > 14 +template +struct __generic_container_node_destructor; + +template +struct __generic_container_node_destructor<__hash_node<_Tp, _VoidPtr>, _Alloc> + : __hash_node_destructor<_Alloc> +{ + using __hash_node_destructor<_Alloc>::__hash_node_destructor; +}; +#endif #ifndef _LIBCPP_CXX03_LANG template @@ -1157,6 +1162,30 @@ public: return __emplace_unique_key_args(_NodeTypes::__get_key(__x), __x); } +#if _LIBCPP_STD_VER > 14 + template + _LIBCPP_INLINE_VISIBILITY + _InsertReturnType __node_handle_insert_unique(_NodeHandle&& __nh); + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_unique(const_iterator __hint, + _NodeHandle&& __nh); + + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_multi(_NodeHandle&& __nh); + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_multi(const_iterator __hint, _NodeHandle&& __nh); + + template + _LIBCPP_INLINE_VISIBILITY + _NodeHandle __node_handle_extract(key_type const& __key); + template + _LIBCPP_INLINE_VISIBILITY + _NodeHandle __node_handle_extract(const_iterator __it); +#endif + void clear() _NOEXCEPT; void rehash(size_type __n); _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) @@ -2132,6 +2161,91 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_multi(const_iterator __p, #endif // _LIBCPP_CXX03_LANG +#if _LIBCPP_STD_VER > 14 +template +template +_LIBCPP_INLINE_VISIBILITY +_InsertReturnType +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_unique( + _NodeHandle&& __nh) +{ + if (__nh.empty()) + return _InsertReturnType{end(), false, _NodeHandle()}; + pair __result = __node_insert_unique(__nh.__ptr_); + if (__result.second) + __nh.__release(); + return _InsertReturnType{__result.first, __result.second, _VSTD::move(__nh)}; +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_unique( + const_iterator, _NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + pair __result = __node_insert_unique(__nh.__ptr_); + if (__result.second) + __nh.__release(); + return __result.first; +} + +template +template +_LIBCPP_INLINE_VISIBILITY +_NodeHandle +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_extract( + key_type const& __key) +{ + iterator __i = find(__key); + if (__i == end()) + return _NodeHandle(); + return __node_handle_extract<_NodeHandle>(__i); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +_NodeHandle +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_extract( + const_iterator __p) +{ + allocator_type __alloc(__node_alloc()); + return _NodeHandle(remove(__p).release(), __alloc); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_multi( + _NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + iterator __result = __node_insert_multi(__nh.__ptr_); + __nh.__release(); + return __result; +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_multi( + const_iterator __hint, _NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + iterator __result = __node_insert_multi(__hint, __nh.__ptr_); + __nh.__release(); + return __result; +} + +#endif // _LIBCPP_STD_VER > 14 + template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n) diff --git a/include/c++/v1/__libcpp_version b/include/c++/v1/__libcpp_version index 2bdc653..e002b36 100644 --- a/include/c++/v1/__libcpp_version +++ b/include/c++/v1/__libcpp_version @@ -1 +1 @@ -7000 +8000 diff --git a/include/c++/v1/__locale b/include/c++/v1/__locale index 601f0d1..f43e7b4 100644 --- a/include/c++/v1/__locale +++ b/include/c++/v1/__locale @@ -24,11 +24,7 @@ #elif defined(_AIX) # include #elif defined(__ANDROID__) -// Android gained the locale aware functions in L (API level 21) -# include -# if __ANDROID_API__ <= 20 -# include -# endif +# include #elif defined(__sun__) # include # include @@ -473,7 +469,7 @@ public: static const mask alnum = alpha | digit; static const mask graph = alnum | punct; - _LIBCPP_ALWAYS_INLINE ctype_base() {} + _LIBCPP_INLINE_VISIBILITY ctype_base() {} }; template class _LIBCPP_TEMPLATE_VIS ctype; @@ -486,77 +482,77 @@ class _LIBCPP_TYPE_VIS ctype public: typedef wchar_t char_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit ctype(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool is(mask __m, char_type __c) const { return do_is(__m, __c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* is(const char_type* __low, const char_type* __high, mask* __vec) const { return do_is(__low, __high, __vec); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* scan_is(mask __m, const char_type* __low, const char_type* __high) const { return do_scan_is(__m, __low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* scan_not(mask __m, const char_type* __low, const char_type* __high) const { return do_scan_not(__m, __low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type toupper(char_type __c) const { return do_toupper(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* toupper(char_type* __low, const char_type* __high) const { return do_toupper(__low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type tolower(char_type __c) const { return do_tolower(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* tolower(char_type* __low, const char_type* __high) const { return do_tolower(__low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type widen(char __c) const { return do_widen(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char* widen(const char* __low, const char* __high, char_type* __to) const { return do_widen(__low, __high, __to); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char narrow(char_type __c, char __dfault) const { return do_narrow(__c, __dfault); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* narrow(const char_type* __low, const char_type* __high, char __dfault, char* __to) const { return do_narrow(__low, __high, __dfault, __to); @@ -591,13 +587,13 @@ public: explicit ctype(const mask* __tab = 0, bool __del = false, size_t __refs = 0); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool is(mask __m, char_type __c) const { return isascii(__c) ? (__tab_[static_cast(__c)] & __m) !=0 : false; } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* is(const char_type* __low, const char_type* __high, mask* __vec) const { for (; __low != __high; ++__low, ++__vec) @@ -605,7 +601,7 @@ public: return __low; } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* scan_is (mask __m, const char_type* __low, const char_type* __high) const { for (; __low != __high; ++__low) @@ -614,7 +610,7 @@ public: return __low; } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* scan_not(mask __m, const char_type* __low, const char_type* __high) const { for (; __low != __high; ++__low) @@ -623,49 +619,49 @@ public: return __low; } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type toupper(char_type __c) const { return do_toupper(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* toupper(char_type* __low, const char_type* __high) const { return do_toupper(__low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type tolower(char_type __c) const { return do_tolower(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char_type* tolower(char_type* __low, const char_type* __high) const { return do_tolower(__low, __high); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char_type widen(char __c) const { return do_widen(__c); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char* widen(const char* __low, const char* __high, char_type* __to) const { return do_widen(__low, __high, __to); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY char narrow(char_type __c, char __dfault) const { return do_narrow(__c, __dfault); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const char* narrow(const char_type* __low, const char_type* __high, char __dfault, char* __to) const { return do_narrow(__low, __high, __dfault, __to); @@ -678,7 +674,7 @@ public: #else static const size_t table_size = 256; // FIXME: Don't hardcode this. #endif - _LIBCPP_ALWAYS_INLINE const mask* table() const _NOEXCEPT {return __tab_;} + _LIBCPP_INLINE_VISIBILITY const mask* table() const _NOEXCEPT {return __tab_;} static const mask* classic_table() _NOEXCEPT; #if defined(__GLIBC__) || defined(__EMSCRIPTEN__) static const int* __classic_upper_table() _NOEXCEPT; @@ -858,7 +854,7 @@ tolower(_CharT __c, const locale& __loc) class _LIBCPP_TYPE_VIS codecvt_base { public: - _LIBCPP_ALWAYS_INLINE codecvt_base() {} + _LIBCPP_INLINE_VISIBILITY codecvt_base() {} enum result {ok, partial, error, noconv}; }; @@ -878,11 +874,11 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result out(state_type& __st, const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const @@ -890,14 +886,14 @@ public: return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result unshift(state_type& __st, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const { return do_unshift(__st, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result in(state_type& __st, const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const @@ -905,25 +901,25 @@ public: return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int encoding() const _NOEXCEPT { return do_encoding(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool always_noconv() const _NOEXCEPT { return do_always_noconv(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const { return do_length(__st, __frm, __end, __mx); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int max_length() const _NOEXCEPT { return do_max_length(); @@ -932,7 +928,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(const char*, size_t __refs = 0) : locale::facet(__refs) {} @@ -967,7 +963,7 @@ public: explicit codecvt(size_t __refs = 0); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result out(state_type& __st, const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const @@ -975,14 +971,14 @@ public: return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result unshift(state_type& __st, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const { return do_unshift(__st, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result in(state_type& __st, const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const @@ -990,25 +986,25 @@ public: return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int encoding() const _NOEXCEPT { return do_encoding(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool always_noconv() const _NOEXCEPT { return do_always_noconv(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const { return do_length(__st, __frm, __end, __mx); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int max_length() const _NOEXCEPT { return do_max_length(); @@ -1047,11 +1043,11 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result out(state_type& __st, const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const @@ -1059,14 +1055,14 @@ public: return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result unshift(state_type& __st, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const { return do_unshift(__st, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result in(state_type& __st, const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const @@ -1074,25 +1070,25 @@ public: return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int encoding() const _NOEXCEPT { return do_encoding(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool always_noconv() const _NOEXCEPT { return do_always_noconv(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const { return do_length(__st, __frm, __end, __mx); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int max_length() const _NOEXCEPT { return do_max_length(); @@ -1101,7 +1097,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(const char*, size_t __refs = 0) : locale::facet(__refs) {} @@ -1133,11 +1129,11 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result out(state_type& __st, const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const @@ -1145,14 +1141,14 @@ public: return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result unshift(state_type& __st, extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const { return do_unshift(__st, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY result in(state_type& __st, const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const @@ -1160,25 +1156,25 @@ public: return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int encoding() const _NOEXCEPT { return do_encoding(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool always_noconv() const _NOEXCEPT { return do_always_noconv(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const { return do_length(__st, __frm, __end, __mx); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int max_length() const _NOEXCEPT { return do_max_length(); @@ -1187,7 +1183,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt(const char*, size_t __refs = 0) : locale::facet(__refs) {} @@ -1214,10 +1210,10 @@ class _LIBCPP_TEMPLATE_VIS codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> { public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt_byname(const char* __nm, size_t __refs = 0) : codecvt<_InternT, _ExternT, _StateT>(__nm, __refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt_byname(const string& __nm, size_t __refs = 0) : codecvt<_InternT, _ExternT, _StateT>(__nm.c_str(), __refs) {} protected: @@ -1248,7 +1244,7 @@ template <> struct __narrow_to_utf8<8> { template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const _CharT* __wb, const _CharT* __we) const { @@ -1262,13 +1258,13 @@ template <> struct __narrow_to_utf8<16> : public codecvt { - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY __narrow_to_utf8() : codecvt(1) {} ~__narrow_to_utf8(); template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const _CharT* __wb, const _CharT* __we) const { @@ -1296,13 +1292,13 @@ template <> struct __narrow_to_utf8<32> : public codecvt { - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY __narrow_to_utf8() : codecvt(1) {} ~__narrow_to_utf8(); template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const _CharT* __wb, const _CharT* __we) const { @@ -1338,7 +1334,7 @@ template <> struct __widen_from_utf8<8> { template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const char* __nb, const char* __ne) const { @@ -1352,13 +1348,13 @@ template <> struct __widen_from_utf8<16> : public codecvt { - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY __widen_from_utf8() : codecvt(1) {} ~__widen_from_utf8(); template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const char* __nb, const char* __ne) const { @@ -1386,13 +1382,13 @@ template <> struct __widen_from_utf8<32> : public codecvt { - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY __widen_from_utf8() : codecvt(1) {} ~__widen_from_utf8(); template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _OutputIterator operator()(_OutputIterator __s, const char* __nb, const char* __ne) const { @@ -1430,11 +1426,11 @@ public: explicit numpunct(size_t __refs = 0); - _LIBCPP_ALWAYS_INLINE char_type decimal_point() const {return do_decimal_point();} - _LIBCPP_ALWAYS_INLINE char_type thousands_sep() const {return do_thousands_sep();} - _LIBCPP_ALWAYS_INLINE string grouping() const {return do_grouping();} - _LIBCPP_ALWAYS_INLINE string_type truename() const {return do_truename();} - _LIBCPP_ALWAYS_INLINE string_type falsename() const {return do_falsename();} + _LIBCPP_INLINE_VISIBILITY char_type decimal_point() const {return do_decimal_point();} + _LIBCPP_INLINE_VISIBILITY char_type thousands_sep() const {return do_thousands_sep();} + _LIBCPP_INLINE_VISIBILITY string grouping() const {return do_grouping();} + _LIBCPP_INLINE_VISIBILITY string_type truename() const {return do_truename();} + _LIBCPP_INLINE_VISIBILITY string_type falsename() const {return do_falsename();} static locale::id id; @@ -1461,11 +1457,11 @@ public: explicit numpunct(size_t __refs = 0); - _LIBCPP_ALWAYS_INLINE char_type decimal_point() const {return do_decimal_point();} - _LIBCPP_ALWAYS_INLINE char_type thousands_sep() const {return do_thousands_sep();} - _LIBCPP_ALWAYS_INLINE string grouping() const {return do_grouping();} - _LIBCPP_ALWAYS_INLINE string_type truename() const {return do_truename();} - _LIBCPP_ALWAYS_INLINE string_type falsename() const {return do_falsename();} + _LIBCPP_INLINE_VISIBILITY char_type decimal_point() const {return do_decimal_point();} + _LIBCPP_INLINE_VISIBILITY char_type thousands_sep() const {return do_thousands_sep();} + _LIBCPP_INLINE_VISIBILITY string grouping() const {return do_grouping();} + _LIBCPP_INLINE_VISIBILITY string_type truename() const {return do_truename();} + _LIBCPP_INLINE_VISIBILITY string_type falsename() const {return do_falsename();} static locale::id id; diff --git a/include/c++/v1/__mutex_base b/include/c++/v1/__mutex_base index 402a52d..4659ca9 100644 --- a/include/c++/v1/__mutex_base +++ b/include/c++/v1/__mutex_base @@ -74,7 +74,7 @@ struct _LIBCPP_TYPE_VIS defer_lock_t {}; struct _LIBCPP_TYPE_VIS try_to_lock_t {}; struct _LIBCPP_TYPE_VIS adopt_lock_t {}; -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_MUTEX) +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) extern const defer_lock_t defer_lock; extern const try_to_lock_t try_to_lock; diff --git a/include/c++/v1/__node_handle b/include/c++/v1/__node_handle new file mode 100644 index 0000000..567f8b0 --- /dev/null +++ b/include/c++/v1/__node_handle @@ -0,0 +1,213 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NODE_HANDLE +#define _LIBCPP___NODE_HANDLE + +#include <__config> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +// FIXME: Uncomment this when we support the 'merge' functionality. +// #define __cpp_lib_node_extract 201606L + +// Specialized in __tree & __hash_table for their _NodeType. +template +struct __generic_container_node_destructor; + +template class _MapOrSetSpecifics> +class _LIBCPP_TEMPLATE_VIS __basic_node_handle + : public _MapOrSetSpecifics< + _NodeType, + __basic_node_handle<_NodeType, _Alloc, _MapOrSetSpecifics>> +{ + template + friend class __tree; + template + friend class __hash_table; + friend struct _MapOrSetSpecifics< + _NodeType, __basic_node_handle<_NodeType, _Alloc, _MapOrSetSpecifics>>; + + typedef allocator_traits<_Alloc> __alloc_traits; + typedef typename __rebind_pointer::type + __node_pointer_type; + +public: + typedef _Alloc allocator_type; + +private: + __node_pointer_type __ptr_ = nullptr; + optional __alloc_; + + _LIBCPP_INLINE_VISIBILITY + void __release() + { + __ptr_ = nullptr; + __alloc_ = _VSTD::nullopt; + } + + _LIBCPP_INLINE_VISIBILITY + void __destroy_node_pointer() + { + if (__ptr_ != nullptr) + { + typedef typename __allocator_traits_rebind< + allocator_type, _NodeType>::type __node_alloc_type; + __node_alloc_type __alloc(*__alloc_); + __generic_container_node_destructor<_NodeType, __node_alloc_type>( + __alloc, true)(__ptr_); + __ptr_ = nullptr; + } + } + + _LIBCPP_INLINE_VISIBILITY + __basic_node_handle(__node_pointer_type __ptr, + allocator_type const& __alloc) + : __ptr_(__ptr), __alloc_(__alloc) + { + } + +public: + _LIBCPP_INLINE_VISIBILITY + __basic_node_handle() = default; + + _LIBCPP_INLINE_VISIBILITY + __basic_node_handle(__basic_node_handle&& __other) noexcept + : __ptr_(__other.__ptr_), + __alloc_(_VSTD::move(__other.__alloc_)) + { + __other.__ptr_ = nullptr; + __other.__alloc_ = _VSTD::nullopt; + } + + _LIBCPP_INLINE_VISIBILITY + __basic_node_handle& operator=(__basic_node_handle&& __other) + { + _LIBCPP_ASSERT( + __alloc_ == _VSTD::nullopt || + __alloc_traits::propagate_on_container_move_assignment::value || + __alloc_ == __other.__alloc_, + "node_type with incompatible allocator passed to " + "node_type::operator=(node_type&&)"); + + __destroy_node_pointer(); + __ptr_ = __other.__ptr_; + + if (__alloc_traits::propagate_on_container_move_assignment::value || + __alloc_ == _VSTD::nullopt) + __alloc_ = _VSTD::move(__other.__alloc_); + + __other.__ptr_ = nullptr; + __other.__alloc_ = _VSTD::nullopt; + + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + allocator_type get_allocator() const { return *__alloc_; } + + _LIBCPP_INLINE_VISIBILITY + explicit operator bool() const { return __ptr_ != nullptr; } + + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY + bool empty() const { return __ptr_ == nullptr; } + + _LIBCPP_INLINE_VISIBILITY + void swap(__basic_node_handle& __other) noexcept( + __alloc_traits::propagate_on_container_swap::value || + __alloc_traits::is_always_equal::value) + { + using _VSTD::swap; + swap(__ptr_, __other.__ptr_); + if (__alloc_traits::propagate_on_container_swap::value || + __alloc_ == _VSTD::nullopt || __other.__alloc_ == _VSTD::nullopt) + swap(__alloc_, __other.__alloc_); + } + + _LIBCPP_INLINE_VISIBILITY + friend void swap(__basic_node_handle& __a, __basic_node_handle& __b) + noexcept(noexcept(__a.swap(__b))) { __a.swap(__b); } + + _LIBCPP_INLINE_VISIBILITY + ~__basic_node_handle() + { + __destroy_node_pointer(); + } +}; + +template +struct __set_node_handle_specifics +{ + typedef typename _NodeType::__node_value_type value_type; + + _LIBCPP_INLINE_VISIBILITY + value_type& value() const + { + return static_cast<_Derived const*>(this)->__ptr_->__value_; + } +}; + +template +struct __map_node_handle_specifics +{ + typedef typename _NodeType::__node_value_type::key_type key_type; + typedef typename _NodeType::__node_value_type::mapped_type mapped_type; + + _LIBCPP_INLINE_VISIBILITY + key_type& key() const + { + return static_cast<_Derived const*>(this)-> + __ptr_->__value_.__ref().first; + } + + _LIBCPP_INLINE_VISIBILITY + mapped_type& mapped() const + { + return static_cast<_Derived const*>(this)-> + __ptr_->__value_.__ref().second; + } +}; + +template +using __set_node_handle = + __basic_node_handle< _NodeType, _Alloc, __set_node_handle_specifics>; + +template +using __map_node_handle = + __basic_node_handle< _NodeType, _Alloc, __map_node_handle_specifics>; + +template +_LIBCPP_TEMPLATE_VIS +struct __insert_return_type +{ + _Iterator position; + bool inserted; + _NodeType node; +}; + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + +#endif diff --git a/include/c++/v1/__nullptr b/include/c++/v1/__nullptr index a341234..aa3b4d2 100644 --- a/include/c++/v1/__nullptr +++ b/include/c++/v1/__nullptr @@ -27,24 +27,24 @@ struct _LIBCPP_TEMPLATE_VIS nullptr_t struct __nat {int __for_bool_;}; - _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR nullptr_t() : __lx(0) {} - _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR nullptr_t(int __nat::*) : __lx(0) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR nullptr_t() : __lx(0) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR nullptr_t(int __nat::*) : __lx(0) {} - _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR operator int __nat::*() const {return 0;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR operator int __nat::*() const {return 0;} template - _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR operator _Tp* () const {return 0;} template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY operator _Tp _Up::* () const {return 0;} - friend _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR bool operator==(nullptr_t, nullptr_t) {return true;} - friend _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR bool operator!=(nullptr_t, nullptr_t) {return false;} + friend _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR bool operator==(nullptr_t, nullptr_t) {return true;} + friend _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR bool operator!=(nullptr_t, nullptr_t) {return false;} }; -inline _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR nullptr_t __get_nullptr_t() {return nullptr_t(0);} +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR nullptr_t __get_nullptr_t() {return nullptr_t(0);} #define nullptr _VSTD::__get_nullptr_t() diff --git a/include/c++/v1/__threading_support b/include/c++/v1/__threading_support index 3c1eff2..2024900 100644 --- a/include/c++/v1/__threading_support +++ b/include/c++/v1/__threading_support @@ -70,7 +70,7 @@ typedef pthread_t __libcpp_thread_id; typedef pthread_t __libcpp_thread_t; -// Thrad Local Storage +// Thread Local Storage typedef pthread_key_t __libcpp_tls_key; #define _LIBCPP_TLS_DESTRUCTOR_CC diff --git a/include/c++/v1/__tree b/include/c++/v1/__tree index 3ccfcb7..3b3586c 100644 --- a/include/c++/v1/__tree +++ b/include/c++/v1/__tree @@ -37,13 +37,8 @@ template class __tree_end_node; template class __tree_node_base; template class __tree_node; -#ifndef _LIBCPP_CXX03_LANG -template -union __value_type; -#else template struct __value_type; -#endif template ::value && !__libcpp_is_final<_Compare>::value> @@ -569,10 +564,9 @@ struct __tree_key_value_types { static __container_value_type* __get_ptr(__node_value_type& __n) { return _VSTD::addressof(__n); } - #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - static __container_value_type&& __move(__node_value_type& __v) { + static __container_value_type&& __move(__node_value_type& __v) { return _VSTD::move(__v); } #endif @@ -584,14 +578,13 @@ struct __tree_key_value_types<__value_type<_Key, _Tp> > { typedef _Tp mapped_type; typedef __value_type<_Key, _Tp> __node_value_type; typedef pair __container_value_type; - typedef pair<_Key, _Tp> __nc_value_type; typedef __container_value_type __map_value_type; static const bool __is_map = true; _LIBCPP_INLINE_VISIBILITY static key_type const& __get_key(__node_value_type const& __t) { - return __t.__cc.first; + return __t.__get_value().first; } template @@ -605,7 +598,7 @@ struct __tree_key_value_types<__value_type<_Key, _Tp> > { _LIBCPP_INLINE_VISIBILITY static __container_value_type const& __get_value(__node_value_type const& __t) { - return __t.__cc; + return __t.__get_value(); } template @@ -618,13 +611,13 @@ struct __tree_key_value_types<__value_type<_Key, _Tp> > { _LIBCPP_INLINE_VISIBILITY static __container_value_type* __get_ptr(__node_value_type& __n) { - return _VSTD::addressof(__n.__cc); + return _VSTD::addressof(__n.__get_value()); } #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - static __nc_value_type&& __move(__node_value_type& __v) { - return _VSTD::move(__v.__nc); + static pair __move(__node_value_type& __v) { + return __v.__move(); } #endif }; @@ -803,6 +796,16 @@ public: template friend class __map_node_destructor; }; +#if _LIBCPP_STD_VER > 14 +template +struct __generic_container_node_destructor; +template +struct __generic_container_node_destructor<__tree_node<_Tp, _VoidPtr>, _Alloc> + : __tree_node_destructor<_Alloc> +{ + using __tree_node_destructor<_Alloc>::__tree_node_destructor; +}; +#endif template class _LIBCPP_TEMPLATE_VIS __tree_iterator @@ -854,7 +857,7 @@ public: __tree_iterator operator--(int) {__tree_iterator __t(*this); --(*this); return __t;} - friend _LIBCPP_INLINE_VISIBILITY + friend _LIBCPP_INLINE_VISIBILITY bool operator==(const __tree_iterator& __x, const __tree_iterator& __y) {return __x.__ptr_ == __y.__ptr_;} friend _LIBCPP_INLINE_VISIBILITY @@ -1345,6 +1348,33 @@ public: iterator __node_insert_multi(__node_pointer __nd); iterator __node_insert_multi(const_iterator __p, __node_pointer __nd); + + _LIBCPP_INLINE_VISIBILITY iterator __remove_node_pointer(__node_pointer); + +#if _LIBCPP_STD_VER > 14 + template + _LIBCPP_INLINE_VISIBILITY + _InsertReturnType __node_handle_insert_unique(_NodeHandle&&); + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_unique(const_iterator, _NodeHandle&&); + + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_multi(_NodeHandle&&); + template + _LIBCPP_INLINE_VISIBILITY + iterator __node_handle_insert_multi(const_iterator, _NodeHandle&&); + + + template + _LIBCPP_INLINE_VISIBILITY + _NodeHandle __node_handle_extract(key_type const&); + template + _LIBCPP_INLINE_VISIBILITY + _NodeHandle __node_handle_extract(const_iterator); +#endif + iterator erase(const_iterator __p); iterator erase(const_iterator __f, const_iterator __l); template @@ -1458,7 +1488,7 @@ private: void __copy_assign_alloc(const __tree& __t, true_type) { if (__node_alloc() != __t.__node_alloc()) - clear(); + clear(); __node_alloc() = __t.__node_alloc(); } _LIBCPP_INLINE_VISIBILITY @@ -1800,7 +1830,7 @@ __tree<_Tp, _Compare, _Allocator>::operator=(__tree&& __t) __node_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable::value && is_nothrow_move_assignable<__node_allocator>::value) - + { __move_assign(__t, integral_constant()); @@ -2354,17 +2384,138 @@ __tree<_Tp, _Compare, _Allocator>::__node_insert_multi(const_iterator __p, template typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::erase(const_iterator __p) +__tree<_Tp, _Compare, _Allocator>::__remove_node_pointer(__node_pointer __ptr) { - __node_pointer __np = __p.__get_np(); - iterator __r(__p.__ptr_); + iterator __r(__ptr); ++__r; - if (__begin_node() == __p.__ptr_) + if (__begin_node() == __ptr) __begin_node() = __r.__ptr_; --size(); - __node_allocator& __na = __node_alloc(); __tree_remove(__end_node()->__left_, - static_cast<__node_base_pointer>(__np)); + static_cast<__node_base_pointer>(__ptr)); + return __r; +} + +#if _LIBCPP_STD_VER > 14 +template +template +_LIBCPP_INLINE_VISIBILITY +_InsertReturnType +__tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique( + _NodeHandle&& __nh) +{ + if (__nh.empty()) + return _InsertReturnType{end(), false, _NodeHandle()}; + + __node_pointer __ptr = __nh.__ptr_; + __parent_pointer __parent; + __node_base_pointer& __child = __find_equal(__parent, + __ptr->__value_); + if (__child != nullptr) + return _InsertReturnType{ + iterator(static_cast<__node_pointer>(__child)), + false, _VSTD::move(__nh)}; + + __insert_node_at(__parent, __child, + static_cast<__node_base_pointer>(__ptr)); + __nh.__release(); + return _InsertReturnType{iterator(__ptr), true, _NodeHandle()}; +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __tree<_Tp, _Compare, _Allocator>::iterator +__tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique( + const_iterator __hint, _NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + + __node_pointer __ptr = __nh.__ptr_; + __parent_pointer __parent; + __node_base_pointer __dummy; + __node_base_pointer& __child = __find_equal(__hint, __parent, __dummy, + __ptr->__value_); + __node_pointer __r = static_cast<__node_pointer>(__child); + if (__child == nullptr) + { + __insert_node_at(__parent, __child, + static_cast<__node_base_pointer>(__ptr)); + __r = __ptr; + __nh.__release(); + } + return iterator(__r); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +_NodeHandle +__tree<_Tp, _Compare, _Allocator>::__node_handle_extract(key_type const& __key) +{ + iterator __it = find(__key); + if (__it == end()) + return _NodeHandle(); + return __node_handle_extract<_NodeHandle>(__it); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +_NodeHandle +__tree<_Tp, _Compare, _Allocator>::__node_handle_extract(const_iterator __p) +{ + __node_pointer __np = __p.__get_np(); + __remove_node_pointer(__np); + return _NodeHandle(__np, __alloc()); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __tree<_Tp, _Compare, _Allocator>::iterator +__tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(_NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + __node_pointer __ptr = __nh.__ptr_; + __parent_pointer __parent; + __node_base_pointer& __child = __find_leaf_high( + __parent, _NodeTypes::__get_key(__ptr->__value_)); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); + __nh.__release(); + return iterator(__ptr); +} + +template +template +_LIBCPP_INLINE_VISIBILITY +typename __tree<_Tp, _Compare, _Allocator>::iterator +__tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi( + const_iterator __hint, _NodeHandle&& __nh) +{ + if (__nh.empty()) + return end(); + + __node_pointer __ptr = __nh.__ptr_; + __parent_pointer __parent; + __node_base_pointer& __child = __find_leaf(__hint, __parent, + _NodeTypes::__get_key(__ptr->__value_)); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); + __nh.__release(); + return iterator(__ptr); +} + +#endif // _LIBCPP_STD_VER > 14 + +template +typename __tree<_Tp, _Compare, _Allocator>::iterator +__tree<_Tp, _Compare, _Allocator>::erase(const_iterator __p) +{ + __node_pointer __np = __p.__get_np(); + iterator __r = __remove_node_pointer(__np); + __node_allocator& __na = __node_alloc(); __node_traits::destroy(__na, _NodeTypes::__get_ptr( const_cast<__node_value_type&>(*__p))); __node_traits::deallocate(__na, __np, 1); diff --git a/include/c++/v1/algorithm b/include/c++/v1/algorithm index a94b07b..9ce6aa0 100644 --- a/include/c++/v1/algorithm +++ b/include/c++/v1/algorithm @@ -645,13 +645,8 @@ template #include #include #include - -#if defined(__IBMCPP__) -#include "support/ibm/support.h" -#endif -#if defined(_LIBCPP_COMPILER_MSVC) -#include -#endif +#include +#include #include <__debug> @@ -671,10 +666,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct __equal_to { - _LIBCPP_INLINE_VISIBILITY bool operator()(const _T1& __x, const _T1& __y) const {return __x == __y;} - _LIBCPP_INLINE_VISIBILITY bool operator()(const _T1& __x, const _T2& __y) const {return __x == __y;} - _LIBCPP_INLINE_VISIBILITY bool operator()(const _T2& __x, const _T1& __y) const {return __x == __y;} - _LIBCPP_INLINE_VISIBILITY bool operator()(const _T2& __x, const _T2& __y) const {return __x == __y;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator()(const _T1& __x, const _T1& __y) const {return __x == __y;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator()(const _T1& __x, const _T2& __y) const {return __x == __y;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator()(const _T2& __x, const _T1& __y) const {return __x == __y;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator()(const _T2& __x, const _T2& __y) const {return __x == __y;} }; template @@ -788,135 +783,6 @@ struct __debug_less #endif // _LIBCPP_DEBUG -// Precondition: __x != 0 -inline _LIBCPP_INLINE_VISIBILITY -unsigned __ctz(unsigned __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_ctz(__x)); -#else - static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); - static_assert(sizeof(unsigned long) == 4, ""); - unsigned long where; - // Search from LSB to MSB for first set bit. - // Returns zero if no set bit is found. - if (_BitScanForward(&where, __x)) - return where; - return 32; -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY -unsigned long __ctz(unsigned long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_ctzl(__x)); -#else - static_assert(sizeof(unsigned long) == sizeof(unsigned), ""); - return __ctz(static_cast(__x)); -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY -unsigned long long __ctz(unsigned long long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_ctzll(__x)); -#else - unsigned long where; -// Search from LSB to MSB for first set bit. -// Returns zero if no set bit is found. -#if defined(_LIBCPP_HAS_BITSCAN64) - (defined(_M_AMD64) || defined(__x86_64__)) - if (_BitScanForward64(&where, __x)) - return static_cast(where); -#else - // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls. - // Scan the Low Word. - if (_BitScanForward(&where, static_cast(__x))) - return where; - // Scan the High Word. - if (_BitScanForward(&where, static_cast(__x >> 32))) - return where + 32; // Create a bit offset from the LSB. -#endif - return 64; -#endif // _LIBCPP_COMPILER_MSVC -} - -// Precondition: __x != 0 -inline _LIBCPP_INLINE_VISIBILITY -unsigned __clz(unsigned __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_clz(__x)); -#else - static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); - static_assert(sizeof(unsigned long) == 4, ""); - unsigned long where; - // Search from LSB to MSB for first set bit. - // Returns zero if no set bit is found. - if (_BitScanReverse(&where, __x)) - return 31 - where; - return 32; // Undefined Behavior. -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY -unsigned long __clz(unsigned long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_clzl (__x)); -#else - static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); - return __clz(static_cast(__x)); -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY -unsigned long long __clz(unsigned long long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return static_cast(__builtin_clzll(__x)); -#else - unsigned long where; -// BitScanReverse scans from MSB to LSB for first set bit. -// Returns 0 if no set bit is found. -#if defined(_LIBCPP_HAS_BITSCAN64) - if (_BitScanReverse64(&where, __x)) - return static_cast(63 - where); -#else - // Scan the high 32 bits. - if (_BitScanReverse(&where, static_cast(__x >> 32))) - return 63 - (where + 32); // Create a bit offset from the MSB. - // Scan the low 32 bits. - if (_BitScanReverse(&where, static_cast(__x))) - return 63 - where; -#endif - return 64; // Undefined Behavior. -#endif // _LIBCPP_COMPILER_MSVC -} - -inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return __builtin_popcount (__x); -#else - static_assert(sizeof(unsigned) == 4, ""); - return __popcnt(__x); -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return __builtin_popcountl (__x); -#else - static_assert(sizeof(unsigned long) == 4, ""); - return __popcnt(__x); -#endif -} - -inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned long long __x) { -#ifndef _LIBCPP_COMPILER_MSVC - return __builtin_popcountll(__x); -#else - static_assert(sizeof(unsigned long long) == 8, ""); - return __popcnt64(__x); -#endif -} - // all_of template @@ -1708,7 +1574,7 @@ __unwrap_iter(move_iterator<_Tp*> __i) #if _LIBCPP_DEBUG_LEVEL < 2 template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1722,7 +1588,7 @@ __unwrap_iter(__wrap_iter<_Tp*> __i) #else template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -2533,6 +2399,8 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + static_assert(__is_forward_iterator<_ForwardIterator>::value, + "std::min_element requires a ForwardIterator"); if (__first != __last) { _ForwardIterator __i = __first; @@ -2597,6 +2465,8 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + static_assert(__is_forward_iterator<_ForwardIterator>::value, + "std::max_element requires a ForwardIterator"); if (__first != __last) { _ForwardIterator __i = __first; @@ -2683,6 +2553,8 @@ _LIBCPP_CONSTEXPR_AFTER_CXX11 std::pair<_ForwardIterator, _ForwardIterator> minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + static_assert(__is_forward_iterator<_ForwardIterator>::value, + "std::minmax_element requires a ForwardIterator"); std::pair<_ForwardIterator, _ForwardIterator> __result(__first, __first); if (__first != __last) { @@ -3027,10 +2899,11 @@ template template typename uniform_int_distribution<_IntType>::result_type uniform_int_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p) +_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { typedef typename conditional::type _UIntType; - const _UIntType _Rp = __p.b() - __p.a() + _UIntType(1); + const _UIntType _Rp = _UIntType(__p.b()) - _UIntType(__p.a()) + _UIntType(1); if (_Rp == 1) return __p.a(); const size_t _Dt = numeric_limits<_UIntType>::digits; @@ -3080,7 +2953,7 @@ public: _LIBCPP_FUNC_VIS __rs_default __rs_get(); template -void +_LIBCPP_DEPRECATED_IN_CXX14 void random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -3101,7 +2974,7 @@ random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last) } template -void +_LIBCPP_DEPRECATED_IN_CXX14 void random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, #ifndef _LIBCPP_CXX03_LANG _RandomNumberGenerator&& __rand) @@ -3116,7 +2989,8 @@ random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, for (--__last; __first < __last; ++__first, --__d) { difference_type __i = __rand(__d); - swap(*__first, *(__first + __i)); + if (__i != difference_type(0)) + swap(*__first, *(__first + __i)); } } } diff --git a/include/c++/v1/any b/include/c++/v1/any index d7161b0..e2945bd 100644 --- a/include/c++/v1/any +++ b/include/c++/v1/any @@ -87,6 +87,7 @@ namespace std { #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -104,13 +105,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 14 -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_any_cast() { #ifndef _LIBCPP_NO_EXCEPTIONS throw bad_any_cast(); #else - _VSTD::abort(); + _VSTD::abort(); #endif } @@ -304,7 +305,7 @@ private: __any_imp::_Buffer __buf; }; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void * __call(_Action __a, any * __other = nullptr, type_info const * __info = nullptr, const void* __fallback_info = nullptr) const @@ -312,7 +313,7 @@ private: return __h(__a, this, __other, __info, __fallback_info); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void * __call(_Action __a, any * __other = nullptr, type_info const * __info = nullptr, const void* __fallback_info = nullptr) diff --git a/include/c++/v1/array b/include/c++/v1/array index fdb1f9d..8f4e111 100644 --- a/include/c++/v1/array +++ b/include/c++/v1/array @@ -72,6 +72,9 @@ struct array const T* data() const noexcept; }; + template + array(T, U...) -> array; + template bool operator==(const array& x, const array& y); template @@ -86,7 +89,7 @@ template bool operator>=(const array& x, const array& y); template - void swap(array& x, array& y) noexcept(noexcept(x.swap(y))); + void swap(array& x, array& y) noexcept(noexcept(x.swap(y))); // C++17 template class tuple_size; template class tuple_element; @@ -109,6 +112,7 @@ template const T&& get(const array&&) noexce #include #include #include // for _LIBCPP_UNREACHABLE +#include #include <__debug> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -354,9 +358,17 @@ struct _LIBCPP_TEMPLATE_VIS array<_Tp, 0> }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template && ...), void>::type + > +array(_Tp, _Args...) + -> array<_Tp, 1 + sizeof...(_Args)>; +#endif + template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator==(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return _VSTD::equal(__x.begin(), __x.end(), __y.begin()); @@ -364,7 +376,7 @@ operator==(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator!=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return !(__x == __y); @@ -372,7 +384,7 @@ operator!=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator<(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return _VSTD::lexicographical_compare(__x.begin(), __x.end(), @@ -381,7 +393,7 @@ operator<(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator>(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return __y < __x; @@ -389,7 +401,7 @@ operator>(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator<=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return !(__y < __x); @@ -397,7 +409,7 @@ operator<=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template inline _LIBCPP_INLINE_VISIBILITY -bool +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator>=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return !(__x < __y); diff --git a/include/c++/v1/atomic b/include/c++/v1/atomic index f55e28f..d37e7b4 100644 --- a/include/c++/v1/atomic +++ b/include/c++/v1/atomic @@ -544,6 +544,7 @@ void atomic_signal_fence(memory_order m) noexcept; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -555,9 +556,8 @@ void atomic_signal_fence(memory_order m) noexcept; #if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) #error is not implemented #endif - -#if _LIBCPP_STD_VER > 14 -# define __cpp_lib_atomic_is_always_lock_free 201603L +#ifdef kill_dependency +#error C++ standard library is incompatible with #endif #define _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) \ @@ -695,7 +695,7 @@ static inline void __c11_atomic_store(_Atomic(_Tp)* __a, _Tp __val, } template -static inline _Tp __c11_atomic_load(volatile _Atomic(_Tp)* __a, +static inline _Tp __c11_atomic_load(const volatile _Atomic(_Tp)* __a, memory_order __order) { _Tp __ret; __atomic_load(&__a->__a_value, &__ret, @@ -704,7 +704,7 @@ static inline _Tp __c11_atomic_load(volatile _Atomic(_Tp)* __a, } template -static inline _Tp __c11_atomic_load(_Atomic(_Tp)* __a, memory_order __order) { +static inline _Tp __c11_atomic_load(const _Atomic(_Tp)* __a, memory_order __order) { _Tp __ret; __atomic_load(&__a->__a_value, &__ret, __gcc_atomic::__to_gcc_order(__order)); @@ -1741,7 +1741,7 @@ typedef struct atomic_flag atomic_flag() _NOEXCEPT : __a_() {} #endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR atomic_flag(bool __b) _NOEXCEPT : __a_(__b) {} // EXTENSION #ifndef _LIBCPP_CXX03_LANG diff --git a/include/c++/v1/bit b/include/c++/v1/bit new file mode 100644 index 0000000..db3812e --- /dev/null +++ b/include/c++/v1/bit @@ -0,0 +1,158 @@ +// -*- C++ -*- +//===------------------------------ bit ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP_BIT +#define _LIBCPP_BIT + +/* + bit synopsis + +namespace std { + +} // namespace std + +*/ + +#include <__config> +#include + +#if defined(__IBMCPP__) +#include "support/ibm/support.h" +#endif +#if defined(_LIBCPP_COMPILER_MSVC) +#include +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#ifndef _LIBCPP_COMPILER_MSVC + +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned __x) { return __builtin_ctz(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned long __x) { return __builtin_ctzl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned long long __x) { return __builtin_ctzll(__x); } + + +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned __x) { return __builtin_clz(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned long __x) { return __builtin_clzl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned long long __x) { return __builtin_clzll(__x); } + + +inline _LIBCPP_INLINE_VISIBILITY +int __popcount(unsigned __x) { return __builtin_popcount(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __popcount(unsigned long __x) { return __builtin_popcountl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY +int __popcount(unsigned long long __x) { return __builtin_popcountll(__x); } + +#else // _LIBCPP_COMPILER_MSVC + +// Precondition: __x != 0 +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + static_assert(sizeof(unsigned long) == 4, ""); + unsigned long __where; + if (_BitScanForward(&__where, __x)) + return static_cast(__where); + return 32; +} + +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned long __x) { + static_assert(sizeof(unsigned long) == sizeof(unsigned), ""); + return __ctz(static_cast(__x)); +} + +inline _LIBCPP_INLINE_VISIBILITY +int __ctz(unsigned long long __x) { + unsigned long __where; +#if defined(_LIBCPP_HAS_BITSCAN64) + (defined(_M_AMD64) || defined(__x86_64__)) + if (_BitScanForward64(&__where, __x)) + return static_cast(__where); +#else + // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls. + if (_BitScanForward(&__where, static_cast(__x))) + return static_cast(__where); + if (_BitScanForward(&__where, static_cast(__x >> 32))) + return static_cast(__where + 32); +#endif + return 64; +} + +// Precondition: __x != 0 +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + static_assert(sizeof(unsigned long) == 4, ""); + unsigned long __where; + if (_BitScanReverse(&__where, __x)) + return static_cast(31 - __where); + return 32; // Undefined Behavior. +} + +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned long __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + return __clz(static_cast(__x)); +} + +inline _LIBCPP_INLINE_VISIBILITY +int __clz(unsigned long long __x) { + unsigned long __where; +#if defined(_LIBCPP_HAS_BITSCAN64) + if (_BitScanReverse64(&__where, __x)) + return static_cast(63 - __where); +#else + // Win32 doesn't have _BitScanReverse64 so emulate it with two 32 bit calls. + if (_BitScanReverse(&__where, static_cast(__x >> 32))) + return static_cast(63 - (__where + 32)); + if (_BitScanReverse(&__where, static_cast(__x))) + return static_cast(63 - __where); +#endif + return 64; // Undefined Behavior. +} + +inline _LIBCPP_INLINE_VISIBILITY int __popcount(unsigned __x) { + static_assert(sizeof(unsigned) == 4, ""); + return __popcnt(__x); +} + +inline _LIBCPP_INLINE_VISIBILITY int __popcount(unsigned long __x) { + static_assert(sizeof(unsigned long) == 4, ""); + return __popcnt(__x); +} + +inline _LIBCPP_INLINE_VISIBILITY int __popcount(unsigned long long __x) { + static_assert(sizeof(unsigned long long) == 8, ""); + return __popcnt64(__x); +} + +#endif // _LIBCPP_COMPILER_MSVC + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_BIT diff --git a/include/c++/v1/bitset b/include/c++/v1/bitset index cd6c289..6e28596 100644 --- a/include/c++/v1/bitset +++ b/include/c++/v1/bitset @@ -238,9 +238,9 @@ __bitset<_N_words, _Size>::__init(unsigned long long __v, false_type) _NOEXCEPT size_t __sz = _Size; for (size_t __i = 0; __i < sizeof(__t)/sizeof(__t[0]); ++__i, __v >>= __bits_per_word, __sz -= __bits_per_word ) if ( __sz < __bits_per_word) - __t[__i] = static_cast<__storage_type>(__v) & ( 1ULL << __sz ) - 1; + __t[__i] = static_cast<__storage_type>(__v) & ( 1ULL << __sz ) - 1; else - __t[__i] = static_cast<__storage_type>(__v); + __t[__i] = static_cast<__storage_type>(__v); _VSTD::copy(__t, __t + sizeof(__t)/sizeof(__t[0]), __first_); _VSTD::fill(__first_ + sizeof(__t)/sizeof(__t[0]), __first_ + sizeof(__first_)/sizeof(__first_[0]), @@ -254,7 +254,7 @@ __bitset<_N_words, _Size>::__init(unsigned long long __v, true_type) _NOEXCEPT { __first_[0] = __v; if (_Size < __bits_per_word) - __first_[0] &= ( 1ULL << _Size ) - 1; + __first_[0] &= ( 1ULL << _Size ) - 1; _VSTD::fill(__first_ + 1, __first_ + sizeof(__first_)/sizeof(__first_[0]), __storage_type(0)); } @@ -269,9 +269,9 @@ __bitset<_N_words, _Size>::__bitset(unsigned long long __v) _NOEXCEPT #if __SIZEOF_SIZE_T__ == 8 : __first_{__v} #elif __SIZEOF_SIZE_T__ == 4 - : __first_{static_cast<__storage_type>(__v), - _Size >= 2 * __bits_per_word ? static_cast<__storage_type>(__v >> __bits_per_word) - : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} + : __first_{static_cast<__storage_type>(__v), + _Size >= 2 * __bits_per_word ? static_cast<__storage_type>(__v >> __bits_per_word) + : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} #else #error This constructor has not been ported to this platform #endif diff --git a/include/c++/v1/cfloat b/include/c++/v1/cfloat index 176fa9d..0abe84b 100644 --- a/include/c++/v1/cfloat +++ b/include/c++/v1/cfloat @@ -20,11 +20,18 @@ Macros: FLT_EVAL_METHOD // C99 FLT_RADIX + FLT_HAS_SUBNORM // C11 + DBL_HAS_SUBNORM // C11 + LDBL_HAS_SUBNORM // C11 + FLT_MANT_DIG DBL_MANT_DIG LDBL_MANT_DIG DECIMAL_DIG // C99 + FLT_DECIMAL_DIG // C11 + DBL_DECIMAL_DIG // C11 + LDBL_DECIMAL_DIG // C11 FLT_DIG DBL_DIG @@ -58,6 +65,9 @@ Macros: DBL_MIN LDBL_MIN + FLT_TRUE_MIN // C11 + DBL_TRUE_MIN // C11 + LDBL_TRUE_MIN // C11 */ #include <__config> diff --git a/include/c++/v1/charconv b/include/c++/v1/charconv new file mode 100644 index 0000000..7cb790e --- /dev/null +++ b/include/c++/v1/charconv @@ -0,0 +1,610 @@ +// -*- C++ -*- +//===------------------------------ charconv ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_CHARCONV +#define _LIBCPP_CHARCONV + +/* + charconv synopsis + +namespace std { + + // floating-point format for primitive numerical conversion + enum class chars_format { + scientific = unspecified, + fixed = unspecified, + hex = unspecified, + general = fixed | scientific + }; + + // 23.20.2, primitive numerical output conversion + struct to_chars_result { + char* ptr; + errc ec; + }; + + to_chars_result to_chars(char* first, char* last, see below value, + int base = 10); + + to_chars_result to_chars(char* first, char* last, float value); + to_chars_result to_chars(char* first, char* last, double value); + to_chars_result to_chars(char* first, char* last, long double value); + + to_chars_result to_chars(char* first, char* last, float value, + chars_format fmt); + to_chars_result to_chars(char* first, char* last, double value, + chars_format fmt); + to_chars_result to_chars(char* first, char* last, long double value, + chars_format fmt); + + to_chars_result to_chars(char* first, char* last, float value, + chars_format fmt, int precision); + to_chars_result to_chars(char* first, char* last, double value, + chars_format fmt, int precision); + to_chars_result to_chars(char* first, char* last, long double value, + chars_format fmt, int precision); + + // 23.20.3, primitive numerical input conversion + struct from_chars_result { + const char* ptr; + errc ec; + }; + + from_chars_result from_chars(const char* first, const char* last, + see below& value, int base = 10); + + from_chars_result from_chars(const char* first, const char* last, + float& value, + chars_format fmt = chars_format::general); + from_chars_result from_chars(const char* first, const char* last, + double& value, + chars_format fmt = chars_format::general); + from_chars_result from_chars(const char* first, const char* last, + long double& value, + chars_format fmt = chars_format::general); + +} // namespace std + +*/ + +#include <__errc> +#include +#include +#include +#include +#include + +#include <__debug> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 11 + +enum class _LIBCPP_ENUM_VIS chars_format +{ + scientific = 0x1, + fixed = 0x2, + hex = 0x4, + general = fixed | scientific +}; + +struct _LIBCPP_TYPE_VIS to_chars_result +{ + char* ptr; + errc ec; +}; + +struct _LIBCPP_TYPE_VIS from_chars_result +{ + const char* ptr; + errc ec; +}; + +void to_chars(char*, char*, bool, int = 10) = delete; +void from_chars(const char*, const char*, bool, int = 10) = delete; + +namespace __itoa +{ + +static constexpr uint64_t __pow10_64[] = { + UINT64_C(0), + UINT64_C(10), + UINT64_C(100), + UINT64_C(1000), + UINT64_C(10000), + UINT64_C(100000), + UINT64_C(1000000), + UINT64_C(10000000), + UINT64_C(100000000), + UINT64_C(1000000000), + UINT64_C(10000000000), + UINT64_C(100000000000), + UINT64_C(1000000000000), + UINT64_C(10000000000000), + UINT64_C(100000000000000), + UINT64_C(1000000000000000), + UINT64_C(10000000000000000), + UINT64_C(100000000000000000), + UINT64_C(1000000000000000000), + UINT64_C(10000000000000000000), +}; + +static constexpr uint32_t __pow10_32[] = { + UINT32_C(0), UINT32_C(10), UINT32_C(100), + UINT32_C(1000), UINT32_C(10000), UINT32_C(100000), + UINT32_C(1000000), UINT32_C(10000000), UINT32_C(100000000), + UINT32_C(1000000000), +}; + +_LIBCPP_FUNC_VIS char* __u64toa(uint64_t __value, char* __buffer); +_LIBCPP_FUNC_VIS char* __u32toa(uint32_t __value, char* __buffer); + +template +struct _LIBCPP_HIDDEN __traits_base +{ + using type = uint64_t; + +#if !defined(_LIBCPP_COMPILER_MSVC) + static _LIBCPP_INLINE_VISIBILITY int __width(_Tp __v) + { + auto __t = (64 - __builtin_clzll(__v | 1)) * 1233 >> 12; + return __t - (__v < __pow10_64[__t]) + 1; + } +#endif + + static _LIBCPP_INLINE_VISIBILITY char* __convert(_Tp __v, char* __p) + { + return __u64toa(__v, __p); + } + + static _LIBCPP_INLINE_VISIBILITY auto& __pow() { return __pow10_64; } +}; + +template +struct _LIBCPP_HIDDEN + __traits_base<_Tp, decltype(void(uint32_t{declval<_Tp>()}))> +{ + using type = uint32_t; + +#if !defined(_LIBCPP_COMPILER_MSVC) + static _LIBCPP_INLINE_VISIBILITY int __width(_Tp __v) + { + auto __t = (32 - __builtin_clz(__v | 1)) * 1233 >> 12; + return __t - (__v < __pow10_32[__t]) + 1; + } +#endif + + static _LIBCPP_INLINE_VISIBILITY char* __convert(_Tp __v, char* __p) + { + return __u32toa(__v, __p); + } + + static _LIBCPP_INLINE_VISIBILITY auto& __pow() { return __pow10_32; } +}; + +template +inline _LIBCPP_INLINE_VISIBILITY bool +__mul_overflowed(unsigned char __a, _Tp __b, unsigned char& __r) +{ + auto __c = __a * __b; + __r = __c; + return __c > (numeric_limits::max)(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY bool +__mul_overflowed(unsigned short __a, _Tp __b, unsigned short& __r) +{ + auto __c = __a * __b; + __r = __c; + return __c > (numeric_limits::max)(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY bool +__mul_overflowed(_Tp __a, _Tp __b, _Tp& __r) +{ + static_assert(is_unsigned<_Tp>::value, ""); +#if !defined(_LIBCPP_COMPILER_MSVC) + return __builtin_mul_overflow(__a, __b, &__r); +#else + bool __did = __b && ((numeric_limits<_Tp>::max)() / __b) < __a; + __r = __a * __b; + return __did; +#endif +} + +template +inline _LIBCPP_INLINE_VISIBILITY bool +__mul_overflowed(_Tp __a, _Up __b, _Tp& __r) +{ + return __mul_overflowed(__a, static_cast<_Tp>(__b), __r); +} + +template +struct _LIBCPP_HIDDEN __traits : __traits_base<_Tp> +{ + static constexpr int digits = numeric_limits<_Tp>::digits10 + 1; + using __traits_base<_Tp>::__pow; + using typename __traits_base<_Tp>::type; + + // precondition: at least one non-zero character available + static _LIBCPP_INLINE_VISIBILITY char const* + __read(char const* __p, char const* __ep, type& __a, type& __b) + { + type __cprod[digits]; + int __j = digits - 1; + int __i = digits; + do + { + if (!('0' <= *__p && *__p <= '9')) + break; + __cprod[--__i] = *__p++ - '0'; + } while (__p != __ep && __i != 0); + + __a = __inner_product(__cprod + __i + 1, __cprod + __j, __pow() + 1, + __cprod[__i]); + if (__mul_overflowed(__cprod[__j], __pow()[__j - __i], __b)) + --__p; + return __p; + } + + template + static _LIBCPP_INLINE_VISIBILITY _Up + __inner_product(_It1 __first1, _It1 __last1, _It2 __first2, _Up __init) + { + for (; __first1 < __last1; ++__first1, ++__first2) + __init = __init + *__first1 * *__first2; + return __init; + } +}; + +} // namespace __itoa + +template +inline _LIBCPP_INLINE_VISIBILITY _Tp +__complement(_Tp __x) +{ + static_assert(is_unsigned<_Tp>::value, "cast to unsigned first"); + return _Tp(~__x + 1); +} + +template +inline _LIBCPP_INLINE_VISIBILITY auto +__to_unsigned(_Tp __x) +{ + return static_cast>(__x); +} + +template +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +__to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) +{ + auto __x = __to_unsigned(__value); + if (__value < 0 && __first != __last) + { + *__first++ = '-'; + __x = __complement(__x); + } + + return __to_chars_itoa(__first, __last, __x, false_type()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +__to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) +{ + using __tx = __itoa::__traits<_Tp>; + auto __diff = __last - __first; + +#if !defined(_LIBCPP_COMPILER_MSVC) + if (__tx::digits <= __diff || __tx::__width(__value) <= __diff) + return {__tx::__convert(__value, __first), {}}; + else + return {__last, errc::value_too_large}; +#else + if (__tx::digits <= __diff) + return {__tx::__convert(__value, __first), {}}; + else + { + char __buf[__tx::digits]; + auto __p = __tx::__convert(__value, __buf); + auto __len = __p - __buf; + if (__len <= __diff) + { + memcpy(__first, __buf, __len); + return {__first + __len, {}}; + } + else + return {__last, errc::value_too_large}; + } +#endif +} + +template +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, + true_type) +{ + auto __x = __to_unsigned(__value); + if (__value < 0 && __first != __last) + { + *__first++ = '-'; + __x = __complement(__x); + } + + return __to_chars_integral(__first, __last, __x, __base, false_type()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, + false_type) +{ + if (__base == 10) + return __to_chars_itoa(__first, __last, __value, false_type()); + + auto __p = __last; + while (__p != __first) + { + auto __c = __value % __base; + __value /= __base; + *--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c]; + if (__value == 0) + break; + } + + auto __len = __last - __p; + if (__value != 0 || !__len) + return {__last, errc::value_too_large}; + else + { + memmove(__first, __p, __len); + return {__first + __len, {}}; + } +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +to_chars(char* __first, char* __last, _Tp __value) +{ + return __to_chars_itoa(__first, __last, __value, is_signed<_Tp>()); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY to_chars_result +to_chars(char* __first, char* __last, _Tp __value, int __base) +{ + _LIBCPP_ASSERT(2 <= __base && __base <= 36, "base not in [2, 36]"); + return __to_chars_integral(__first, __last, __value, __base, + is_signed<_Tp>()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__sign_combinator(_It __first, _It __last, _Tp& __value, _Fn __f, _Ts... __args) +{ + using __tl = numeric_limits<_Tp>; + decltype(__to_unsigned(__value)) __x; + + bool __neg = (__first != __last && *__first == '-'); + auto __r = __f(__neg ? __first + 1 : __first, __last, __x, __args...); + switch (__r.ec) + { + case errc::invalid_argument: + return {__first, __r.ec}; + case errc::result_out_of_range: + return __r; + default: + break; + } + + if (__neg) + { + if (__x <= __complement(__to_unsigned(__tl::min()))) + { + __x = __complement(__x); + memcpy(&__value, &__x, sizeof(__x)); + return __r; + } + } + else + { + if (__x <= (__tl::max)()) + { + __value = __x; + return __r; + } + } + + return {__r.ptr, errc::result_out_of_range}; +} + +template +inline _LIBCPP_INLINE_VISIBILITY bool +__in_pattern(_Tp __c) +{ + return '0' <= __c && __c <= '9'; +} + +struct _LIBCPP_HIDDEN __in_pattern_result +{ + bool __ok; + int __val; + + explicit _LIBCPP_INLINE_VISIBILITY operator bool() const { return __ok; } +}; + +template +inline _LIBCPP_INLINE_VISIBILITY __in_pattern_result +__in_pattern(_Tp __c, int __base) +{ + if (__base <= 10) + return {'0' <= __c && __c < '0' + __base, __c - '0'}; + else if (__in_pattern(__c)) + return {true, __c - '0'}; + else if ('a' <= __c && __c < 'a' + __base - 10) + return {true, __c - 'a' + 10}; + else + return {'A' <= __c && __c < 'A' + __base - 10, __c - 'A' + 10}; +} + +template +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__subject_seq_combinator(_It __first, _It __last, _Tp& __value, _Fn __f, + _Ts... __args) +{ + auto __find_non_zero = [](_It __first, _It __last) { + for (; __first != __last; ++__first) + if (*__first != '0') + break; + return __first; + }; + + auto __p = __find_non_zero(__first, __last); + if (__p == __last || !__in_pattern(*__p, __args...)) + { + if (__p == __first) + return {__first, errc::invalid_argument}; + else + { + __value = 0; + return {__p, {}}; + } + } + + auto __r = __f(__p, __last, __value, __args...); + if (__r.ec == errc::result_out_of_range) + { + for (; __r.ptr != __last; ++__r.ptr) + { + if (!__in_pattern(*__r.ptr, __args...)) + break; + } + } + + return __r; +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__from_chars_atoi(const char* __first, const char* __last, _Tp& __value) +{ + using __tx = __itoa::__traits<_Tp>; + using __output_type = typename __tx::type; + + return __subject_seq_combinator( + __first, __last, __value, + [](const char* __first, const char* __last, + _Tp& __value) -> from_chars_result { + __output_type __a, __b; + auto __p = __tx::__read(__first, __last, __a, __b); + if (__p == __last || !__in_pattern(*__p)) + { + __output_type __m = (numeric_limits<_Tp>::max)(); + if (__m >= __a && __m - __a >= __b) + { + __value = __a + __b; + return {__p, {}}; + } + } + return {__p, errc::result_out_of_range}; + }); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__from_chars_atoi(const char* __first, const char* __last, _Tp& __value) +{ + using __t = decltype(__to_unsigned(__value)); + return __sign_combinator(__first, __last, __value, __from_chars_atoi<__t>); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__from_chars_integral(const char* __first, const char* __last, _Tp& __value, + int __base) +{ + if (__base == 10) + return __from_chars_atoi(__first, __last, __value); + + return __subject_seq_combinator( + __first, __last, __value, + [](const char* __p, const char* __last, _Tp& __value, + int __base) -> from_chars_result { + using __tl = numeric_limits<_Tp>; + auto __digits = __tl::digits / log2f(float(__base)); + _Tp __a = __in_pattern(*__p++, __base).__val, __b = 0; + + for (int __i = 1; __p != __last; ++__i, ++__p) + { + if (auto __c = __in_pattern(*__p, __base)) + { + if (__i < __digits - 1) + __a = __a * __base + __c.__val; + else + { + if (!__itoa::__mul_overflowed(__a, __base, __a)) + ++__p; + __b = __c.__val; + break; + } + } + else + break; + } + + if (__p == __last || !__in_pattern(*__p, __base)) + { + if ((__tl::max)() - __a >= __b) + { + __value = __a + __b; + return {__p, {}}; + } + } + return {__p, errc::result_out_of_range}; + }, + __base); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +__from_chars_integral(const char* __first, const char* __last, _Tp& __value, + int __base) +{ + using __t = decltype(__to_unsigned(__value)); + return __sign_combinator(__first, __last, __value, + __from_chars_integral<__t>, __base); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +from_chars(const char* __first, const char* __last, _Tp& __value) +{ + return __from_chars_atoi(__first, __last, __value); +} + +template ::value, int> = 0> +inline _LIBCPP_INLINE_VISIBILITY from_chars_result +from_chars(const char* __first, const char* __last, _Tp& __value, int __base) +{ + _LIBCPP_ASSERT(2 <= __base && __base <= 36, "base not in [2, 36]"); + return __from_chars_integral(__first, __last, __value, __base); +} + +#endif // _LIBCPP_STD_VER > 11 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_CHARCONV diff --git a/include/c++/v1/chrono b/include/c++/v1/chrono index 809f773..0691101 100644 --- a/include/c++/v1/chrono +++ b/include/c++/v1/chrono @@ -77,16 +77,18 @@ public: constexpr common_type::type operator+() const; constexpr common_type::type operator-() const; - constexpr duration& operator++(); - constexpr duration operator++(int); - constexpr duration& operator--(); - constexpr duration operator--(int); + constexpr duration& operator++(); // constexpr in C++17 + constexpr duration operator++(int); // constexpr in C++17 + constexpr duration& operator--(); // constexpr in C++17 + constexpr duration operator--(int); // constexpr in C++17 - constexpr duration& operator+=(const duration& d); - constexpr duration& operator-=(const duration& d); + constexpr duration& operator+=(const duration& d); // constexpr in C++17 + constexpr duration& operator-=(const duration& d); // constexpr in C++17 - duration& operator*=(const rep& rhs); - duration& operator/=(const rep& rhs); + duration& operator*=(const rep& rhs); // constexpr in C++17 + duration& operator/=(const rep& rhs); // constexpr in C++17 + duration& operator%=(const rep& rhs); // constexpr in C++17 + duration& operator%=(const duration& rhs); // constexpr in C++17 // special values @@ -127,8 +129,8 @@ public: // arithmetic - time_point& operator+=(const duration& d); - time_point& operator-=(const duration& d); + time_point& operator+=(const duration& d); // constexpr in C++17 + time_point& operator-=(const duration& d); // constexpr in C++17 // special values @@ -147,6 +149,11 @@ template namespace chrono { + +template struct is_clock; // C++20 +template inline constexpr bool is_clock_v = is_clock::value; // C++20 + + // duration arithmetic template constexpr @@ -204,6 +211,8 @@ template template constexpr ToDuration round(const duration& d); // C++17 +// duration I/O is elsewhere + // time_point arithmetic (all constexpr in C++14) template time_point>::type> @@ -251,6 +260,7 @@ template template constexpr duration abs(duration d); // C++17 + // Clocks class system_clock @@ -267,6 +277,28 @@ public: static time_point from_time_t(time_t __t) noexcept; }; +template + using sys_time = time_point; // C++20 +using sys_seconds = sys_time; // C++20 +using sys_days = sys_time; // C++20 + +class utc_clock; // C++20 + +template + using utc_time = time_point; // C++20 +using utc_seconds = utc_time; // C++20 + +class tai_clock; // C++20 + +template + using tai_time = time_point; // C++20 +using tai_seconds = tai_time; // C++20 + +class file_clock; // C++20 + +template + using file_time = time_point; // C++20 + class steady_clock { public: @@ -281,8 +313,466 @@ public: typedef steady_clock high_resolution_clock; +// 25.7.8, local time // C++20 +struct local_t {}; +template + using local_time = time_point; +using local_seconds = local_time; +using local_days = local_time; + +// 25.7.9, time_point conversions template // C++20 +struct clock_time_conversion; + +template + auto clock_cast(const time_point& t); + +// 25.8.2, class last_spec // C++20 +struct last_spec; + +// 25.8.3, class day // C++20 + +class day; +constexpr bool operator==(const day& x, const day& y) noexcept; +constexpr bool operator!=(const day& x, const day& y) noexcept; +constexpr bool operator< (const day& x, const day& y) noexcept; +constexpr bool operator> (const day& x, const day& y) noexcept; +constexpr bool operator<=(const day& x, const day& y) noexcept; +constexpr bool operator>=(const day& x, const day& y) noexcept; +constexpr day operator+(const day& x, const days& y) noexcept; +constexpr day operator+(const days& x, const day& y) noexcept; +constexpr day operator-(const day& x, const days& y) noexcept; +constexpr days operator-(const day& x, const day& y) noexcept; + +// 25.8.4, class month // C++20 +class month; +constexpr bool operator==(const month& x, const month& y) noexcept; +constexpr bool operator!=(const month& x, const month& y) noexcept; +constexpr bool operator< (const month& x, const month& y) noexcept; +constexpr bool operator> (const month& x, const month& y) noexcept; +constexpr bool operator<=(const month& x, const month& y) noexcept; +constexpr bool operator>=(const month& x, const month& y) noexcept; +constexpr month operator+(const month& x, const months& y) noexcept; +constexpr month operator+(const months& x, const month& y) noexcept; +constexpr month operator-(const month& x, const months& y) noexcept; +constexpr months operator-(const month& x, const month& y) noexcept; + +// 25.8.5, class year // C++20 +class year; +constexpr bool operator==(const year& x, const year& y) noexcept; +constexpr bool operator!=(const year& x, const year& y) noexcept; +constexpr bool operator< (const year& x, const year& y) noexcept; +constexpr bool operator> (const year& x, const year& y) noexcept; +constexpr bool operator<=(const year& x, const year& y) noexcept; +constexpr bool operator>=(const year& x, const year& y) noexcept; +constexpr year operator+(const year& x, const years& y) noexcept; +constexpr year operator+(const years& x, const year& y) noexcept; +constexpr year operator-(const year& x, const years& y) noexcept; +constexpr years operator-(const year& x, const year& y) noexcept; + +// 25.8.6, class weekday // C++20 +class weekday; + +constexpr bool operator==(const weekday& x, const weekday& y) noexcept; +constexpr bool operator!=(const weekday& x, const weekday& y) noexcept; +constexpr weekday operator+(const weekday& x, const days& y) noexcept; +constexpr weekday operator+(const days& x, const weekday& y) noexcept; +constexpr weekday operator-(const weekday& x, const days& y) noexcept; +constexpr days operator-(const weekday& x, const weekday& y) noexcept; + +// 25.8.7, class weekday_indexed // C++20 + +class weekday_indexed; +constexpr bool operator==(const weekday_indexed& x, const weekday_indexed& y) noexcept; +constexpr bool operator!=(const weekday_indexed& x, const weekday_indexed& y) noexcept; + +// 25.8.8, class weekday_last // C++20 +class weekday_last; + +constexpr bool operator==(const weekday_last& x, const weekday_last& y) noexcept; +constexpr bool operator!=(const weekday_last& x, const weekday_last& y) noexcept; + +// 25.8.9, class month_day // C++20 +class month_day; + +constexpr bool operator==(const month_day& x, const month_day& y) noexcept; +constexpr bool operator!=(const month_day& x, const month_day& y) noexcept; +constexpr bool operator< (const month_day& x, const month_day& y) noexcept; +constexpr bool operator> (const month_day& x, const month_day& y) noexcept; +constexpr bool operator<=(const month_day& x, const month_day& y) noexcept; +constexpr bool operator>=(const month_day& x, const month_day& y) noexcept; + + +// 25.8.10, class month_day_last // C++20 +class month_day_last; + +constexpr bool operator==(const month_day_last& x, const month_day_last& y) noexcept; +constexpr bool operator!=(const month_day_last& x, const month_day_last& y) noexcept; +constexpr bool operator< (const month_day_last& x, const month_day_last& y) noexcept; +constexpr bool operator> (const month_day_last& x, const month_day_last& y) noexcept; +constexpr bool operator<=(const month_day_last& x, const month_day_last& y) noexcept; +constexpr bool operator>=(const month_day_last& x, const month_day_last& y) noexcept; + +// 25.8.11, class month_weekday // C++20 +class month_weekday; + +constexpr bool operator==(const month_weekday& x, const month_weekday& y) noexcept; +constexpr bool operator!=(const month_weekday& x, const month_weekday& y) noexcept; + +// 25.8.12, class month_weekday_last // C++20 +class month_weekday_last; + +constexpr bool operator==(const month_weekday_last& x, const month_weekday_last& y) noexcept; +constexpr bool operator!=(const month_weekday_last& x, const month_weekday_last& y) noexcept; + + +// 25.8.13, class year_month // C++20 +class year_month; + +constexpr bool operator==(const year_month& x, const year_month& y) noexcept; +constexpr bool operator!=(const year_month& x, const year_month& y) noexcept; +constexpr bool operator< (const year_month& x, const year_month& y) noexcept; +constexpr bool operator> (const year_month& x, const year_month& y) noexcept; +constexpr bool operator<=(const year_month& x, const year_month& y) noexcept; +constexpr bool operator>=(const year_month& x, const year_month& y) noexcept; + +constexpr year_month operator+(const year_month& ym, const months& dm) noexcept; +constexpr year_month operator+(const months& dm, const year_month& ym) noexcept; +constexpr year_month operator-(const year_month& ym, const months& dm) noexcept; +constexpr months operator-(const year_month& x, const year_month& y) noexcept; +constexpr year_month operator+(const year_month& ym, const years& dy) noexcept; +constexpr year_month operator+(const years& dy, const year_month& ym) noexcept; +constexpr year_month operator-(const year_month& ym, const years& dy) noexcept; + +// 25.8.14, class year_month_day class // C++20 +year_month_day; + +constexpr bool operator==(const year_month_day& x, const year_month_day& y) noexcept; +constexpr bool operator!=(const year_month_day& x, const year_month_day& y) noexcept; +constexpr bool operator< (const year_month_day& x, const year_month_day& y) noexcept; +constexpr bool operator> (const year_month_day& x, const year_month_day& y) noexcept; +constexpr bool operator<=(const year_month_day& x, const year_month_day& y) noexcept; +constexpr bool operator>=(const year_month_day& x, const year_month_day& y) noexcept; + +constexpr year_month_day operator+(const year_month_day& ymd, const months& dm) noexcept; +constexpr year_month_day operator+(const months& dm, const year_month_day& ymd) noexcept; +constexpr year_month_day operator+(const year_month_day& ymd, const years& dy) noexcept; +constexpr year_month_day operator+(const years& dy, const year_month_day& ymd) noexcept; +constexpr year_month_day operator-(const year_month_day& ymd, const months& dm) noexcept; +constexpr year_month_day operator-(const year_month_day& ymd, const years& dy) noexcept; + + +// 25.8.15, class year_month_day_last // C++20 +class year_month_day_last; + +constexpr bool operator==(const year_month_day_last& x, + const year_month_day_last& y) noexcept; +constexpr bool operator!=(const year_month_day_last& x, + const year_month_day_last& y) noexcept; +constexpr bool operator< (const year_month_day_last& x, + const year_month_day_last& y) noexcept; +constexpr bool operator> (const year_month_day_last& x, + const year_month_day_last& y) noexcept; +constexpr bool operator<=(const year_month_day_last& x, + const year_month_day_last& y) noexcept; +constexpr bool operator>=(const year_month_day_last& x, + const year_month_day_last& y) noexcept; + +constexpr year_month_day_last + operator+(const year_month_day_last& ymdl, const months& dm) noexcept; +constexpr year_month_day_last + operator+(const months& dm, const year_month_day_last& ymdl) noexcept; +constexpr year_month_day_last + operator+(const year_month_day_last& ymdl, const years& dy) noexcept; +constexpr year_month_day_last + operator+(const years& dy, const year_month_day_last& ymdl) noexcept; +constexpr year_month_day_last + operator-(const year_month_day_last& ymdl, const months& dm) noexcept; +constexpr year_month_day_last + operator-(const year_month_day_last& ymdl, const years& dy) noexcept; + +// 25.8.16, class year_month_weekday // C++20 +class year_month_weekday; + +constexpr bool operator==(const year_month_weekday& x, + const year_month_weekday& y) noexcept; +constexpr bool operator!=(const year_month_weekday& x, + const year_month_weekday& y) noexcept; + +constexpr year_month_weekday + operator+(const year_month_weekday& ymwd, const months& dm) noexcept; +constexpr year_month_weekday + operator+(const months& dm, const year_month_weekday& ymwd) noexcept; +constexpr year_month_weekday + operator+(const year_month_weekday& ymwd, const years& dy) noexcept; +constexpr year_month_weekday + operator+(const years& dy, const year_month_weekday& ymwd) noexcept; +constexpr year_month_weekday + operator-(const year_month_weekday& ymwd, const months& dm) noexcept; +constexpr year_month_weekday + operator-(const year_month_weekday& ymwd, const years& dy) noexcept; + +// 25.8.17, class year_month_weekday_last // C++20 +class year_month_weekday_last; + +constexpr bool operator==(const year_month_weekday_last& x, + const year_month_weekday_last& y) noexcept; +constexpr bool operator!=(const year_month_weekday_last& x, + const year_month_weekday_last& y) noexcept; +constexpr year_month_weekday_last + operator+(const year_month_weekday_last& ymwdl, const months& dm) noexcept; +constexpr year_month_weekday_last + operator+(const months& dm, const year_month_weekday_last& ymwdl) noexcept; +constexpr year_month_weekday_last + operator+(const year_month_weekday_last& ymwdl, const years& dy) noexcept; +constexpr year_month_weekday_last + operator+(const years& dy, const year_month_weekday_last& ymwdl) noexcept; +constexpr year_month_weekday_last + operator-(const year_month_weekday_last& ymwdl, const months& dm) noexcept; +constexpr year_month_weekday_last + operator-(const year_month_weekday_last& ymwdl, const years& dy) noexcept; + +// 25.8.18, civil calendar conventional syntax operators // C++20 +constexpr year_month + operator/(const year& y, const month& m) noexcept; +constexpr year_month + operator/(const year& y, int m) noexcept; +constexpr month_day + operator/(const month& m, const day& d) noexcept; +constexpr month_day + operator/(const month& m, int d) noexcept; +constexpr month_day + operator/(int m, const day& d) noexcept; +constexpr month_day + operator/(const day& d, const month& m) noexcept; +constexpr month_day + operator/(const day& d, int m) noexcept; +constexpr month_day_last + operator/(const month& m, last_spec) noexcept; +constexpr month_day_last + operator/(int m, last_spec) noexcept; +constexpr month_day_last + operator/(last_spec, const month& m) noexcept; +constexpr month_day_last + operator/(last_spec, int m) noexcept; +constexpr month_weekday + operator/(const month& m, const weekday_indexed& wdi) noexcept; +constexpr month_weekday + operator/(int m, const weekday_indexed& wdi) noexcept; +constexpr month_weekday + operator/(const weekday_indexed& wdi, const month& m) noexcept; +constexpr month_weekday + operator/(const weekday_indexed& wdi, int m) noexcept; +constexpr month_weekday_last + operator/(const month& m, const weekday_last& wdl) noexcept; +constexpr month_weekday_last + operator/(int m, const weekday_last& wdl) noexcept; +constexpr month_weekday_last + operator/(const weekday_last& wdl, const month& m) noexcept; +constexpr month_weekday_last + operator/(const weekday_last& wdl, int m) noexcept; +constexpr year_month_day + operator/(const year_month& ym, const day& d) noexcept; +constexpr year_month_day + operator/(const year_month& ym, int d) noexcept; +constexpr year_month_day + operator/(const year& y, const month_day& md) noexcept; +constexpr year_month_day + operator/(int y, const month_day& md) noexcept; +constexpr year_month_day + operator/(const month_day& md, const year& y) noexcept; +constexpr year_month_day + operator/(const month_day& md, int y) noexcept; +constexpr year_month_day_last + operator/(const year_month& ym, last_spec) noexcept; +constexpr year_month_day_last + operator/(const year& y, const month_day_last& mdl) noexcept; +constexpr year_month_day_last + operator/(int y, const month_day_last& mdl) noexcept; +constexpr year_month_day_last + operator/(const month_day_last& mdl, const year& y) noexcept; +constexpr year_month_day_last + operator/(const month_day_last& mdl, int y) noexcept; +constexpr year_month_weekday + operator/(const year_month& ym, const weekday_indexed& wdi) noexcept; +constexpr year_month_weekday + operator/(const year& y, const month_weekday& mwd) noexcept; +constexpr year_month_weekday + operator/(int y, const month_weekday& mwd) noexcept; +constexpr year_month_weekday + operator/(const month_weekday& mwd, const year& y) noexcept; +constexpr year_month_weekday + operator/(const month_weekday& mwd, int y) noexcept; +constexpr year_month_weekday_last + operator/(const year_month& ym, const weekday_last& wdl) noexcept; +constexpr year_month_weekday_last + operator/(const year& y, const month_weekday_last& mwdl) noexcept; +constexpr year_month_weekday_last + operator/(int y, const month_weekday_last& mwdl) noexcept; +constexpr year_month_weekday_last + operator/(const month_weekday_last& mwdl, const year& y) noexcept; +constexpr year_month_weekday_last + operator/(const month_weekday_last& mwdl, int y) noexcept; + +// 25.9, class template time_of_day // C++20 +template class time_of_day; + +template<> class time_of_day; +template<> class time_of_day; +template<> class time_of_day; +template class time_of_day>; + +// 25.10.2, time zone database // C++20 +struct tzdb; +class tzdb_list; + +// 25.10.2.3, time zone database access // C++20 +const tzdb& get_tzdb(); +tzdb_list& get_tzdb_list(); +const time_zone* locate_zone(string_view tz_name); +const time_zone* current_zone(); + +// 25.10.2.4, remote time zone database support // C++20 +const tzdb& reload_tzdb(); +string remote_version(); + +// 25.10.3, exception classes // C++20 +class nonexistent_local_time; +class ambiguous_local_time; + +// 25.10.4, information classes // C++20 +struct sys_info; +struct local_info; + +// 25.10.5, class time_zone // C++20 +enum class choose {earliest, latest}; +class time_zone; +bool operator==(const time_zone& x, const time_zone& y) noexcept; +bool operator!=(const time_zone& x, const time_zone& y) noexcept; +bool operator<(const time_zone& x, const time_zone& y) noexcept; +bool operator>(const time_zone& x, const time_zone& y) noexcept; +bool operator<=(const time_zone& x, const time_zone& y) noexcept; +bool operator>=(const time_zone& x, const time_zone& y) noexcept; + +// 25.10.6, class template zoned_traits // C++20 +template struct zoned_traits; + +// 25.10.7, class template zoned_time // C++20 +template class zoned_time; +using zoned_seconds = zoned_time; + +template + bool operator==(const zoned_time& x, + const zoned_time& y); +template + bool operator!=(const zoned_time& x, + const zoned_time& y); + +// 25.10.8, leap second support // C++20 +class leap; + +bool operator==(const leap& x, const leap& y); +bool operator!=(const leap& x, const leap& y); +bool operator< (const leap& x, const leap& y); +bool operator> (const leap& x, const leap& y); +bool operator<=(const leap& x, const leap& y); +bool operator>=(const leap& x, const leap& y); +template + bool operator==(const leap& x, const sys_time& y); +template + bool operator==(const sys_time& x, const leap& y); +template + bool operator!=(const leap& x, const sys_time& y); +template + bool operator!=(const sys_time& x, const leap& y); +template + bool operator< (const leap& x, const sys_time& y); +template + bool operator< (const sys_time& x, const leap& y); +template + bool operator> (const leap& x, const sys_time& y); +template + bool operator> (const sys_time& x, const leap& y); +template + bool operator<=(const leap& x, const sys_time& y); +template + bool operator<=(const sys_time& x, const leap& y); +template + bool operator>=(const leap& x, const sys_time& y); +template + bool operator>=(const sys_time& x, const leap& y); + +// 25.10.9, class link // C++20 +class link; +bool operator==(const link& x, const link& y); +bool operator!=(const link& x, const link& y); +bool operator< (const link& x, const link& y); +bool operator> (const link& x, const link& y); +bool operator<=(const link& x, const link& y); +bool operator>=(const link& x, const link& y); + +// 25.11, formatting // C++20 +template + basic_string + format(const charT* fmt, const Streamable& s); + +template + basic_string + format(const locale& loc, const charT* fmt, const Streamable& s); + +template + basic_string + format(const basic_string& fmt, const Streamable& s); + +template + basic_string + format(const locale& loc, const basic_string& fmt, + const Streamable& s); + +// 25.12, parsing // C++20 +template +unspecified + parse(const basic_string& format, Parsable& tp); + +template +unspecified + parse(const basic_string& format, Parsable& tp, + basic_string& abbrev); + +template +unspecified + parse(const basic_string& format, Parsable& tp, + minutes& offset); + +template +unspecified + parse(const basic_string& format, Parsable& tp, + basic_string& abbrev, minutes& offset); + +inline constexpr last_spec last{}; // C++20 +inline constexpr chrono::weekday Sunday{0}; // C++20 +inline constexpr chrono::weekday Monday{1}; // C++20 +inline constexpr chrono::weekday Tuesday{2}; // C++20 +inline constexpr chrono::weekday Wednesday{3}; // C++20 +inline constexpr chrono::weekday Thursday{4}; // C++20 +inline constexpr chrono::weekday Friday{5}; // C++20 +inline constexpr chrono::weekday Saturday{6}; // C++20 + +inline constexpr chrono::month January{1}; // C++20 +inline constexpr chrono::month February{2}; // C++20 +inline constexpr chrono::month March{3}; // C++20 +inline constexpr chrono::month April{4}; // C++20 +inline constexpr chrono::month May{5}; // C++20 +inline constexpr chrono::month June{6}; // C++20 +inline constexpr chrono::month July{7}; // C++20 +inline constexpr chrono::month August{8}; // C++20 +inline constexpr chrono::month September{9}; // C++20 +inline constexpr chrono::month October{10}; // C++20 +inline constexpr chrono::month November{11}; // C++20 +inline constexpr chrono::month December{12}; // C++20 } // chrono +inline namespace literals { + inline namespace chrono_literals { constexpr chrono::hours operator ""h(unsigned long long); // C++14 constexpr chrono::duration> operator ""h(long double); // C++14 constexpr chrono::minutes operator ""min(unsigned long long); // C++14 @@ -295,6 +785,10 @@ constexpr chrono::microseconds operator ""us(unsigned l constexpr chrono::duration operator ""us(long double); // C++14 constexpr chrono::nanoseconds operator ""ns(unsigned long long); // C++14 constexpr chrono::duration operator ""ns(long double); // C++14 +constexpr chrono::day operator ""d(unsigned long long d) noexcept; // C++20 +constexpr chrono::year operator ""y(unsigned long long y) noexcept; // C++20 +} // chrono_literals +} // literals } // std */ @@ -304,6 +798,7 @@ constexpr chrono::duration operator ""ns(long doubl #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -523,7 +1018,7 @@ class _LIBCPP_TEMPLATE_VIS duration typedef ratio<__mul<__n1, __d2, !value>::value, __mul<__n2, __d1, !value>::value> type; }; - + public: typedef _Rep rep; typedef typename _Period::type period; @@ -863,8 +1358,8 @@ public: // arithmetic - _LIBCPP_INLINE_VISIBILITY time_point& operator+=(const duration& __d) {__d_ += __d; return *this;} - _LIBCPP_INLINE_VISIBILITY time_point& operator-=(const duration& __d) {__d_ -= __d; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 time_point& operator+=(const duration& __d) {__d_ += __d; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 time_point& operator-=(const duration& __d) {__d_ -= __d; return *this;} // special values @@ -1084,7 +1579,7 @@ typedef system_clock high_resolution_clock; #if _LIBCPP_STD_VER > 11 // Suffixes for duration literals [time.duration.literals] inline namespace literals -{ +{ inline namespace chrono_literals { @@ -1141,7 +1636,7 @@ inline namespace literals { return chrono::duration (__us); } - + constexpr chrono::nanoseconds operator""ns(unsigned long long __ns) { diff --git a/include/c++/v1/cmath b/include/c++/v1/cmath index 917928a..f5f62ad 100644 --- a/include/c++/v1/cmath +++ b/include/c++/v1/cmath @@ -303,6 +303,7 @@ long double truncl(long double x); #include <__config> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -547,7 +548,7 @@ hypot(_A1 __lcpp_x, _A2 __lcpp_y, _A3 __lcpp_z) _NOEXCEPT #endif template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isnan_or_builtin(_A1 __lcpp_x) _NOEXCEPT { @@ -559,7 +560,7 @@ __libcpp_isnan_or_builtin(_A1 __lcpp_x) _NOEXCEPT } template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isnan_or_builtin(_A1 __lcpp_x) _NOEXCEPT { @@ -567,7 +568,7 @@ __libcpp_isnan_or_builtin(_A1 __lcpp_x) _NOEXCEPT } template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isinf_or_builtin(_A1 __lcpp_x) _NOEXCEPT { @@ -579,7 +580,7 @@ __libcpp_isinf_or_builtin(_A1 __lcpp_x) _NOEXCEPT } template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isinf_or_builtin(_A1 __lcpp_x) _NOEXCEPT { @@ -587,7 +588,7 @@ __libcpp_isinf_or_builtin(_A1 __lcpp_x) _NOEXCEPT } template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isfinite_or_builtin(_A1 __lcpp_x) _NOEXCEPT { @@ -599,7 +600,7 @@ __libcpp_isfinite_or_builtin(_A1 __lcpp_x) _NOEXCEPT } template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if::value, bool>::type __libcpp_isfinite_or_builtin(_A1 __lcpp_x) _NOEXCEPT { diff --git a/include/c++/v1/codecvt b/include/c++/v1/codecvt index 46f56ac..5eb9d15 100644 --- a/include/c++/v1/codecvt +++ b/include/c++/v1/codecvt @@ -86,7 +86,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -121,7 +121,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -156,7 +156,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -186,11 +186,11 @@ class _LIBCPP_TEMPLATE_VIS codecvt_utf8 : public __codecvt_utf8<_Elem> { public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt_utf8(size_t __refs = 0) : __codecvt_utf8<_Elem>(__refs, _Maxcode, _Mode) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~codecvt_utf8() {} }; @@ -209,7 +209,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -244,7 +244,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -279,7 +279,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -314,7 +314,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -349,7 +349,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -384,7 +384,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -414,11 +414,11 @@ class _LIBCPP_TEMPLATE_VIS codecvt_utf16 : public __codecvt_utf16<_Elem, _Mode & little_endian> { public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt_utf16(size_t __refs = 0) : __codecvt_utf16<_Elem, _Mode & little_endian>(__refs, _Maxcode, _Mode) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~codecvt_utf16() {} }; @@ -437,7 +437,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -472,7 +472,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -507,7 +507,7 @@ public: typedef char extern_type; typedef mbstate_t state_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, codecvt_mode _Mode) : codecvt(__refs), _Maxcode_(_Maxcode), @@ -537,11 +537,11 @@ class _LIBCPP_TEMPLATE_VIS codecvt_utf8_utf16 : public __codecvt_utf8_utf16<_Elem> { public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit codecvt_utf8_utf16(size_t __refs = 0) : __codecvt_utf8_utf16<_Elem>(__refs, _Maxcode, _Mode) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~codecvt_utf8_utf16() {} }; diff --git a/include/c++/v1/compare b/include/c++/v1/compare index 17fc6ff..07f88f0 100644 --- a/include/c++/v1/compare +++ b/include/c++/v1/compare @@ -88,14 +88,14 @@ public: static const weak_equality equivalent; static const weak_equality nonequivalent; - friend constexpr bool operator==(weak_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator==(_CmpUnspecifiedParam, weak_equality __v) noexcept; - friend constexpr bool operator!=(weak_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(weak_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(_CmpUnspecifiedParam, weak_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(weak_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_equality __v) noexcept; #ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR - friend constexpr weak_equality operator<=>(weak_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr weak_equality operator<=>(_CmpUnspecifiedParam, weak_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr weak_equality operator<=>(weak_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr weak_equality operator<=>(_CmpUnspecifiedParam, weak_equality __v) noexcept; #endif private: @@ -148,20 +148,20 @@ public: static const strong_equality nonequivalent; // conversion - constexpr operator weak_equality() const noexcept { + _LIBCPP_INLINE_VISIBILITY constexpr operator weak_equality() const noexcept { return __value_ == _EqResult::__zero ? weak_equality::equivalent : weak_equality::nonequivalent; } // comparisons - friend constexpr bool operator==(strong_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator!=(strong_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator==(_CmpUnspecifiedParam, strong_equality __v) noexcept; - friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(strong_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(strong_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(_CmpUnspecifiedParam, strong_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_equality __v) noexcept; #ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR - friend constexpr strong_equality operator<=>(strong_equality __v, _CmpUnspecifiedParam) noexcept; - friend constexpr strong_equality operator<=>(_CmpUnspecifiedParam, strong_equality __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr strong_equality operator<=>(strong_equality __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr strong_equality operator<=>(_CmpUnspecifiedParam, strong_equality __v) noexcept; #endif private: _EqResult __value_; @@ -235,22 +235,22 @@ public: } // comparisons - friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator< (partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator> (partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept; - friend constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; - friend constexpr bool operator< (_CmpUnspecifiedParam, partial_ordering __v) noexcept; - friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; - friend constexpr bool operator> (_CmpUnspecifiedParam, partial_ordering __v) noexcept; - friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept; #ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR - friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept; #endif private: @@ -351,22 +351,22 @@ public: } // comparisons - friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator< (weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator> (weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept; - friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; - friend constexpr bool operator< (_CmpUnspecifiedParam, weak_ordering __v) noexcept; - friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; - friend constexpr bool operator> (_CmpUnspecifiedParam, weak_ordering __v) noexcept; - friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept; #ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR - friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept; #endif private: @@ -477,22 +477,22 @@ public: } // comparisons - friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator< (strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator> (strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept; - friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; - friend constexpr bool operator< (_CmpUnspecifiedParam, strong_ordering __v) noexcept; - friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; - friend constexpr bool operator> (_CmpUnspecifiedParam, strong_ordering __v) noexcept; - friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept; #ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR - friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept; - friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept; + _LIBCPP_INLINE_VISIBILITY friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept; #endif private: diff --git a/include/c++/v1/complex b/include/c++/v1/complex index d692ee3..8cf6a94 100644 --- a/include/c++/v1/complex +++ b/include/c++/v1/complex @@ -245,6 +245,7 @@ template #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/include/c++/v1/cstddef b/include/c++/v1/cstddef index adeefda..b4c42b1 100644 --- a/include/c++/v1/cstddef +++ b/include/c++/v1/cstddef @@ -35,6 +35,7 @@ Types: */ #include <__config> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -50,7 +51,7 @@ using ::ptrdiff_t; using ::size_t; #if defined(__CLANG_MAX_ALIGN_T_DEFINED) || defined(_GCC_MAX_ALIGN_T) || \ - defined(__DEFINED_max_align_t) + defined(__DEFINED_max_align_t) || defined(__NetBSD__) // Re-use the compiler's max_align_t where possible. using ::max_align_t; #else @@ -66,10 +67,10 @@ enum class byte : unsigned char {}; constexpr byte operator| (byte __lhs, byte __rhs) noexcept { - return static_cast( - static_cast( - static_cast(__lhs) | static_cast(__rhs) - )); + return static_cast( + static_cast( + static_cast(__lhs) | static_cast(__rhs) + )); } constexpr byte& operator|=(byte& __lhs, byte __rhs) noexcept @@ -77,10 +78,10 @@ constexpr byte& operator|=(byte& __lhs, byte __rhs) noexcept constexpr byte operator& (byte __lhs, byte __rhs) noexcept { - return static_cast( - static_cast( - static_cast(__lhs) & static_cast(__rhs) - )); + return static_cast( + static_cast( + static_cast(__lhs) & static_cast(__rhs) + )); } constexpr byte& operator&=(byte& __lhs, byte __rhs) noexcept @@ -88,13 +89,13 @@ constexpr byte& operator&=(byte& __lhs, byte __rhs) noexcept constexpr byte operator^ (byte __lhs, byte __rhs) noexcept { - return static_cast( - static_cast( - static_cast(__lhs) ^ static_cast(__rhs) - )); + return static_cast( + static_cast( + static_cast(__lhs) ^ static_cast(__rhs) + )); } -constexpr byte& operator^=(byte& __lhs, byte __rhs) noexcept +constexpr byte& operator^=(byte& __lhs, byte __rhs) noexcept { return __lhs = __lhs ^ __rhs; } constexpr byte operator~ (byte __b) noexcept diff --git a/include/c++/v1/cstdlib b/include/c++/v1/cstdlib index 78c4284..00c604e 100644 --- a/include/c++/v1/cstdlib +++ b/include/c++/v1/cstdlib @@ -151,11 +151,11 @@ using ::mbtowc; using ::wctomb; using ::mbstowcs; using ::wcstombs; -#ifdef _LIBCPP_HAS_QUICK_EXIT +#if !defined(_LIBCPP_CXX03_LANG) && defined(_LIBCPP_HAS_QUICK_EXIT) using ::at_quick_exit; using ::quick_exit; #endif -#ifdef _LIBCPP_HAS_C11_FEATURES +#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_C11_FEATURES) using ::aligned_alloc; #endif diff --git a/include/c++/v1/ctime b/include/c++/v1/ctime index da9e329..8264fe3 100644 --- a/include/c++/v1/ctime +++ b/include/c++/v1/ctime @@ -18,7 +18,8 @@ Macros: NULL CLOCKS_PER_SEC - + TIME_UTC // C++17 + namespace std { @@ -28,7 +29,8 @@ Types: size_t time_t tm - + timespec // C++17 + clock_t clock(); double difftime(time_t time1, time_t time0); time_t mktime(tm* timeptr); @@ -39,7 +41,7 @@ tm* gmtime(const time_t* timer); tm* localtime(const time_t* timer); size_t strftime(char* restrict s, size_t maxsize, const char* restrict format, const tm* restrict timeptr); - +int timespec_get( struct timespec *ts, int base); // C++17 } // std */ @@ -57,6 +59,9 @@ using ::clock_t; using ::size_t; using ::time_t; using ::tm; +#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_C11_FEATURES) +using ::timespec; +#endif using ::clock; using ::difftime; using ::mktime; @@ -68,6 +73,9 @@ using ::gmtime; using ::localtime; #endif using ::strftime; +#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) +using ::timespec_get; +#endif _LIBCPP_END_NAMESPACE_STD diff --git a/include/c++/v1/deque b/include/c++/v1/deque index 08cb295..414c7a8 100644 --- a/include/c++/v1/deque +++ b/include/c++/v1/deque @@ -128,6 +128,10 @@ public: void clear() noexcept; }; +template ::value_type>> + deque(InputIterator, InputIterator, Allocator = Allocator()) + -> deque::value_type, Allocator>; + template bool operator==(const deque& x, const deque& y); template @@ -157,6 +161,7 @@ template #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -921,13 +926,14 @@ class __deque_base { __deque_base(const __deque_base& __c); __deque_base& operator=(const __deque_base& __c); -protected: - typedef _Tp value_type; +public: typedef _Allocator allocator_type; typedef allocator_traits __alloc_traits; + typedef typename __alloc_traits::size_type size_type; +protected: + typedef _Tp value_type; typedef value_type& reference; typedef const value_type& const_reference; - typedef typename __alloc_traits::size_type size_type; typedef typename __alloc_traits::difference_type difference_type; typedef typename __alloc_traits::pointer pointer; typedef typename __alloc_traits::const_pointer const_pointer; @@ -946,6 +952,7 @@ protected: typedef __deque_iterator const_iterator; +protected: __map __map_; size_type __start_; __compressed_pair __size_; @@ -1461,6 +1468,23 @@ private: void __move_assign(deque& __c, false_type); }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value_type>, + class = typename enable_if<__is_allocator<_Alloc>::value, void>::type + > +deque(_InputIterator, _InputIterator) + -> deque::value_type, _Alloc>; + +template::value, void>::type + > +deque(_InputIterator, _InputIterator, _Alloc) + -> deque::value_type, _Alloc>; +#endif + + template deque<_Tp, _Allocator>::deque(size_type __n) { diff --git a/include/c++/v1/exception b/include/c++/v1/exception index 79bd6ac..7fd9279 100644 --- a/include/c++/v1/exception +++ b/include/c++/v1/exception @@ -81,6 +81,7 @@ template void rethrow_if_nested(const E& e); #include #include #include +#include #if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) #include @@ -259,7 +260,7 @@ struct __throw_with_nested; template struct __throw_with_nested<_Tp, _Up, true> { - _LIBCPP_NORETURN static inline _LIBCPP_ALWAYS_INLINE void + _LIBCPP_NORETURN static inline _LIBCPP_INLINE_VISIBILITY void #ifndef _LIBCPP_CXX03_LANG __do_throw(_Tp&& __t) #else @@ -272,7 +273,7 @@ struct __throw_with_nested<_Tp, _Up, true> { template struct __throw_with_nested<_Tp, _Up, false> { - _LIBCPP_NORETURN static inline _LIBCPP_ALWAYS_INLINE void + _LIBCPP_NORETURN static inline _LIBCPP_INLINE_VISIBILITY void #ifndef _LIBCPP_CXX03_LANG __do_throw(_Tp&& __t) #else diff --git a/include/c++/v1/experimental/__config b/include/c++/v1/experimental/__config index 37f88c1..c6f1776 100644 --- a/include/c++/v1/experimental/__config +++ b/include/c++/v1/experimental/__config @@ -52,6 +52,23 @@ #define _VSTD_CORO _VSTD_EXPERIMENTAL::coroutines_v1 -#define _VSTD_FS ::std::experimental::filesystem::v1 +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD \ + _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace parallelism_v2 { + +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD \ + } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI \ + _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD namespace simd_abi { + +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI \ + } _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD + +// TODO: support more targets +#if defined(__AVX__) +#define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 32 +#else +#define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 16 +#endif #endif diff --git a/include/c++/v1/experimental/coroutine b/include/c++/v1/experimental/coroutine index 32d55fa..1eb224a 100644 --- a/include/c++/v1/experimental/coroutine +++ b/include/c++/v1/experimental/coroutine @@ -283,6 +283,7 @@ public: _LIBCPP_CONSTEXPR_AFTER_CXX17 void destroy() const _NOEXCEPT {} private: + _LIBCPP_INLINE_VISIBILITY friend coroutine_handle noop_coroutine() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY coroutine_handle() _NOEXCEPT { diff --git a/include/c++/v1/experimental/dynarray b/include/c++/v1/experimental/dynarray index cebfc20..a60c87c 100644 --- a/include/c++/v1/experimental/dynarray +++ b/include/c++/v1/experimental/dynarray @@ -133,7 +133,7 @@ public: private: size_t __size_; value_type * __base_; - _LIBCPP_ALWAYS_INLINE dynarray () noexcept : __size_(0), __base_(nullptr) {} + _LIBCPP_INLINE_VISIBILITY dynarray () noexcept : __size_(0), __base_(nullptr) {} static inline _LIBCPP_INLINE_VISIBILITY value_type* __allocate(size_t __count) { diff --git a/include/c++/v1/experimental/filesystem b/include/c++/v1/experimental/filesystem index a851824..28d8dcf 100644 --- a/include/c++/v1/experimental/filesystem +++ b/include/c++/v1/experimental/filesystem @@ -16,15 +16,15 @@ class path; - void swap(path& lhs, path& rhs) _NOEXCEPT; - size_t hash_value(const path& p) _NOEXCEPT; + void swap(path& lhs, path& rhs) noexcept; + size_t hash_value(const path& p) noexcept; - bool operator==(const path& lhs, const path& rhs) _NOEXCEPT; - bool operator!=(const path& lhs, const path& rhs) _NOEXCEPT; - bool operator< (const path& lhs, const path& rhs) _NOEXCEPT; - bool operator<=(const path& lhs, const path& rhs) _NOEXCEPT; - bool operator> (const path& lhs, const path& rhs) _NOEXCEPT; - bool operator>=(const path& lhs, const path& rhs) _NOEXCEPT; + bool operator==(const path& lhs, const path& rhs) noexcept; + bool operator!=(const path& lhs, const path& rhs) noexcept; + bool operator< (const path& lhs, const path& rhs) noexcept; + bool operator<=(const path& lhs, const path& rhs) noexcept; + bool operator> (const path& lhs, const path& rhs) noexcept; + bool operator>=(const path& lhs, const path& rhs) noexcept; path operator/ (const path& lhs, const path& rhs); @@ -96,88 +96,88 @@ void copy_symlink(const path& existing_symlink, const path& new_symlink); void copy_symlink(const path& existing_symlink, const path& new_symlink, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; bool create_directories(const path& p); bool create_directories(const path& p, error_code& ec); bool create_directory(const path& p); - bool create_directory(const path& p, error_code& ec) _NOEXCEPT; + bool create_directory(const path& p, error_code& ec) noexcept; bool create_directory(const path& p, const path& attributes); bool create_directory(const path& p, const path& attributes, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; void create_directory_symlink(const path& to, const path& new_symlink); void create_directory_symlink(const path& to, const path& new_symlink, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; void create_hard_link(const path& to, const path& new_hard_link); void create_hard_link(const path& to, const path& new_hard_link, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; void create_symlink(const path& to, const path& new_symlink); void create_symlink(const path& to, const path& new_symlink, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; path current_path(); path current_path(error_code& ec); void current_path(const path& p); - void current_path(const path& p, error_code& ec) _NOEXCEPT; + void current_path(const path& p, error_code& ec) noexcept; - bool exists(file_status s) _NOEXCEPT; + bool exists(file_status s) noexcept; bool exists(const path& p); - bool exists(const path& p, error_code& ec) _NOEXCEPT; + bool exists(const path& p, error_code& ec) noexcept; bool equivalent(const path& p1, const path& p2); - bool equivalent(const path& p1, const path& p2, error_code& ec) _NOEXCEPT; + bool equivalent(const path& p1, const path& p2, error_code& ec) noexcept; uintmax_t file_size(const path& p); - uintmax_t file_size(const path& p, error_code& ec) _NOEXCEPT; + uintmax_t file_size(const path& p, error_code& ec) noexcept; uintmax_t hard_link_count(const path& p); - uintmax_t hard_link_count(const path& p, error_code& ec) _NOEXCEPT; + uintmax_t hard_link_count(const path& p, error_code& ec) noexcept; - bool is_block_file(file_status s) _NOEXCEPT; + bool is_block_file(file_status s) noexcept; bool is_block_file(const path& p); - bool is_block_file(const path& p, error_code& ec) _NOEXCEPT; + bool is_block_file(const path& p, error_code& ec) noexcept; - bool is_character_file(file_status s) _NOEXCEPT; + bool is_character_file(file_status s) noexcept; bool is_character_file(const path& p); - bool is_character_file(const path& p, error_code& ec) _NOEXCEPT; + bool is_character_file(const path& p, error_code& ec) noexcept; - bool is_directory(file_status s) _NOEXCEPT; + bool is_directory(file_status s) noexcept; bool is_directory(const path& p); - bool is_directory(const path& p, error_code& ec) _NOEXCEPT; + bool is_directory(const path& p, error_code& ec) noexcept; bool is_empty(const path& p); - bool is_empty(const path& p, error_code& ec) _NOEXCEPT; + bool is_empty(const path& p, error_code& ec) noexcept; - bool is_fifo(file_status s) _NOEXCEPT; + bool is_fifo(file_status s) noexcept; bool is_fifo(const path& p); - bool is_fifo(const path& p, error_code& ec) _NOEXCEPT; + bool is_fifo(const path& p, error_code& ec) noexcept; - bool is_other(file_status s) _NOEXCEPT; + bool is_other(file_status s) noexcept; bool is_other(const path& p); - bool is_other(const path& p, error_code& ec) _NOEXCEPT; + bool is_other(const path& p, error_code& ec) noexcept; - bool is_regular_file(file_status s) _NOEXCEPT; + bool is_regular_file(file_status s) noexcept; bool is_regular_file(const path& p); - bool is_regular_file(const path& p, error_code& ec) _NOEXCEPT; + bool is_regular_file(const path& p, error_code& ec) noexcept; - bool is_socket(file_status s) _NOEXCEPT; + bool is_socket(file_status s) noexcept; bool is_socket(const path& p); - bool is_socket(const path& p, error_code& ec) _NOEXCEPT; + bool is_socket(const path& p, error_code& ec) noexcept; - bool is_symlink(file_status s) _NOEXCEPT; + bool is_symlink(file_status s) noexcept; bool is_symlink(const path& p); - bool is_symlink(const path& p, error_code& ec) _NOEXCEPT; + bool is_symlink(const path& p, error_code& ec) noexcept; file_time_type last_write_time(const path& p); - file_time_type last_write_time(const path& p, error_code& ec) _NOEXCEPT; + file_time_type last_write_time(const path& p, error_code& ec) noexcept; void last_write_time(const path& p, file_time_type new_time); void last_write_time(const path& p, file_time_type new_time, - error_code& ec) _NOEXCEPT; + error_code& ec) noexcept; void permissions(const path& p, perms prms, perm_options opts=perm_options::replace); @@ -197,27 +197,27 @@ path relative(const path& p, const path& base, error_code& ec); bool remove(const path& p); - bool remove(const path& p, error_code& ec) _NOEXCEPT; + bool remove(const path& p, error_code& ec) noexcept; uintmax_t remove_all(const path& p); uintmax_t remove_all(const path& p, error_code& ec); void rename(const path& from, const path& to); - void rename(const path& from, const path& to, error_code& ec) _NOEXCEPT; + void rename(const path& from, const path& to, error_code& ec) noexcept; void resize_file(const path& p, uintmax_t size); - void resize_file(const path& p, uintmax_t size, error_code& ec) _NOEXCEPT; + void resize_file(const path& p, uintmax_t size, error_code& ec) noexcept; space_info space(const path& p); - space_info space(const path& p, error_code& ec) _NOEXCEPT; + space_info space(const path& p, error_code& ec) noexcept; file_status status(const path& p); - file_status status(const path& p, error_code& ec) _NOEXCEPT; + file_status status(const path& p, error_code& ec) noexcept; - bool status_known(file_status s) _NOEXCEPT; + bool status_known(file_status s) noexcept; file_status symlink_status(const path& p); - file_status symlink_status(const path& p, error_code& ec) _NOEXCEPT; + file_status symlink_status(const path& p, error_code& ec) noexcept; path temp_directory_path(); path temp_directory_path(error_code& ec); @@ -231,2039 +231,27 @@ */ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // for quoted -#include - -#include <__debug> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#ifndef _LIBCPP_CXX03_LANG + #define __cpp_lib_experimental_filesystem 201406 _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_FILESYSTEM -typedef chrono::time_point file_time_type; - -struct _LIBCPP_TYPE_VIS space_info -{ - uintmax_t capacity; - uintmax_t free; - uintmax_t available; -}; - -enum class _LIBCPP_ENUM_VIS file_type : signed char -{ - none = 0, - not_found = -1, - regular = 1, - directory = 2, - symlink = 3, - block = 4, - character = 5, - fifo = 6, - socket = 7, - unknown = 8 -}; - -enum class _LIBCPP_ENUM_VIS perms : unsigned -{ - none = 0, - - owner_read = 0400, - owner_write = 0200, - owner_exec = 0100, - owner_all = 0700, - - group_read = 040, - group_write = 020, - group_exec = 010, - group_all = 070, - - others_read = 04, - others_write = 02, - others_exec = 01, - others_all = 07, - - all = 0777, - - set_uid = 04000, - set_gid = 02000, - sticky_bit = 01000, - mask = 07777, - unknown = 0xFFFF, -}; - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perms operator&(perms _LHS, perms _RHS) -{ return static_cast(static_cast(_LHS) & static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perms operator|(perms _LHS, perms _RHS) -{ return static_cast(static_cast(_LHS) | static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perms operator^(perms _LHS, perms _RHS) -{ return static_cast(static_cast(_LHS) ^ static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perms operator~(perms _LHS) -{ return static_cast(~static_cast(_LHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator&=(perms& _LHS, perms _RHS) -{ return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator|=(perms& _LHS, perms _RHS) -{ return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator^=(perms& _LHS, perms _RHS) -{ return _LHS = _LHS ^ _RHS; } - -enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { - replace = 1, - add = 2, - remove = 4, - nofollow = 8 -}; - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perm_options operator&(perm_options _LHS, perm_options _RHS) -{ return static_cast(static_cast(_LHS) & static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perm_options operator|(perm_options _LHS, perm_options _RHS) -{ return static_cast(static_cast(_LHS) | static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perm_options operator^(perm_options _LHS, perm_options _RHS) -{ return static_cast(static_cast(_LHS) ^ static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR perm_options operator~(perm_options _LHS) -{ return static_cast(~static_cast(_LHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) -{ return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) -{ return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) -{ return _LHS = _LHS ^ _RHS; } - -enum class _LIBCPP_ENUM_VIS copy_options : unsigned short -{ - none = 0, - skip_existing = 1, - overwrite_existing = 2, - update_existing = 4, - recursive = 8, - copy_symlinks = 16, - skip_symlinks = 32, - directories_only = 64, - create_symlinks = 128, - create_hard_links = 256, - __in_recursive_copy = 512, -}; - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR copy_options operator&(copy_options _LHS, copy_options _RHS) -{ return static_cast(static_cast(_LHS) & static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR copy_options operator|(copy_options _LHS, copy_options _RHS) -{ return static_cast(static_cast(_LHS) | static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR copy_options operator^(copy_options _LHS, copy_options _RHS) -{ return static_cast(static_cast(_LHS) ^ static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR copy_options operator~(copy_options _LHS) -{ return static_cast(~static_cast(_LHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) -{ return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) -{ return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) -{ return _LHS = _LHS ^ _RHS; } - - -enum class _LIBCPP_ENUM_VIS directory_options : unsigned char -{ - none = 0, - follow_directory_symlink = 1, - skip_permission_denied = 2 -}; - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR directory_options operator&(directory_options _LHS, directory_options _RHS) -{ return static_cast(static_cast(_LHS) & static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR directory_options operator|(directory_options _LHS, directory_options _RHS) -{ return static_cast(static_cast(_LHS) | static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR directory_options operator^(directory_options _LHS, directory_options _RHS) -{ return static_cast(static_cast(_LHS) ^ static_cast(_RHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline _LIBCPP_CONSTEXPR directory_options operator~(directory_options _LHS) -{ return static_cast(~static_cast(_LHS)); } - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator&=(directory_options& _LHS, directory_options _RHS) -{ return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator|=(directory_options& _LHS, directory_options _RHS) -{ return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator^=(directory_options& _LHS, directory_options _RHS) -{ return _LHS = _LHS ^ _RHS; } - - -class _LIBCPP_TYPE_VIS file_status -{ -public: - // constructors - _LIBCPP_INLINE_VISIBILITY - file_status() _NOEXCEPT : file_status(file_type::none) {} - _LIBCPP_INLINE_VISIBILITY - explicit file_status(file_type __ft, - perms __prms = perms::unknown) _NOEXCEPT - : __ft_(__ft), __prms_(__prms) - {} - - file_status(const file_status&) _NOEXCEPT = default; - file_status(file_status&&) _NOEXCEPT = default; - - _LIBCPP_INLINE_VISIBILITY - ~file_status() {} - - file_status& operator=(const file_status&) _NOEXCEPT = default; - file_status& operator=(file_status&&) _NOEXCEPT = default; - - // observers - _LIBCPP_ALWAYS_INLINE - file_type type() const _NOEXCEPT { - return __ft_; - } - - _LIBCPP_ALWAYS_INLINE - perms permissions() const _NOEXCEPT { - return __prms_; - } - - // modifiers - _LIBCPP_ALWAYS_INLINE - void type(file_type __ft) _NOEXCEPT { - __ft_ = __ft; - } - - _LIBCPP_ALWAYS_INLINE - void permissions(perms __p) _NOEXCEPT { - __prms_ = __p; - } -private: - file_type __ft_; - perms __prms_; -}; - -class _LIBCPP_TYPE_VIS directory_entry; - -template struct __can_convert_char { - static const bool value = false; -}; -template struct __can_convert_char - : public __can_convert_char<_Tp> { -}; -template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char; -}; -template <> struct __can_convert_char { - static const bool value = true; - using __char_type = wchar_t; -}; -template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char16_t; -}; -template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char32_t; -}; - -template -typename enable_if<__can_convert_char<_ECharT>::value, bool>::type -__is_separator(_ECharT __e) { - return __e == _ECharT('/'); -}; - -struct _NullSentinal {}; - -template -using _Void = void; - -template -struct __is_pathable_string : public false_type {}; - -template -struct __is_pathable_string, - _Void::__char_type>> -: public __can_convert_char<_ECharT> -{ - using _Str = basic_string<_ECharT, _Traits, _Alloc>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { return __s.data() + __s.length(); } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - - -template -struct __is_pathable_string, - _Void::__char_type>> -: public __can_convert_char<_ECharT> -{ - using _Str = basic_string_view<_ECharT, _Traits>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { return __s.data() + __s.length(); } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - -template ::type, - class _UnqualPtrType = typename remove_const< - typename remove_pointer<_DS>::type>::type, - bool _IsCharPtr = is_pointer<_DS>::value && - __can_convert_char<_UnqualPtrType>::value - > -struct __is_pathable_char_array : false_type {}; - -template -struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> - : __can_convert_char::type> -{ - using _Base = __can_convert_char::type>; - - static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } - static _ECharT const* __range_end(const _ECharT* __b) - { - using _Iter = const _ECharT*; - const _ECharT __sentinal = _ECharT{}; - _Iter __e = __b; - for (; *__e != __sentinal; ++__e) - ; - return __e; - } - - static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } -}; - -template ::value, class = void> -struct __is_pathable_iter : false_type {}; - -template -struct __is_pathable_iter<_Iter, true, - _Void::value_type>::__char_type>> - : __can_convert_char::value_type> -{ - using _ECharT = typename iterator_traits<_Iter>::value_type; - using _Base = __can_convert_char<_ECharT>; - - static _Iter __range_begin(_Iter __b) { return __b; } - static _NullSentinal __range_end(_Iter) { return _NullSentinal{}; } - - static _ECharT __first_or_null(_Iter __b) { return *__b; } -}; - -template ::value, - bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, - bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value - > -struct __is_pathable : false_type { - static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); -}; - -template -struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; - - -template -struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> {}; - - -template -struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; - - -template -struct _PathCVT { - static_assert(__can_convert_char<_ECharT>::value, "Char type not convertible"); - - typedef __narrow_to_utf8 _Narrower; - - static void __append_range(string& __dest, _ECharT const* __b, _ECharT const* __e) { - _Narrower()(back_inserter(__dest), __b, __e); - } - - template - static void __append_range(string& __dest, _Iter __b, _Iter __e) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - if (__b == __e) return; - basic_string<_ECharT> __tmp(__b, __e); - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); - } - - template - static void __append_range(string& __dest, _Iter __b, _NullSentinal) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - const _ECharT __sentinal = _ECharT{}; - if (*__b == __sentinal) return; - basic_string<_ECharT> __tmp; - for (; *__b != __sentinal; ++__b) - __tmp.push_back(*__b); - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); - } - - template - static void __append_source(string& __dest, _Source const& __s) - { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), _Traits::__range_end(__s)); - } -}; - -template <> -struct _PathCVT { - - template - static typename enable_if< - __is_exactly_input_iterator<_Iter>::value - >::type __append_range(string& __dest, _Iter __b, _Iter __e) { - for (; __b != __e; ++__b) - __dest.push_back(*__b); - } - - template - static typename enable_if< - __is_forward_iterator<_Iter>::value - >::type __append_range(string& __dest, _Iter __b, _Iter __e) { - __dest.__append_forward_unsafe(__b, __e); - } - - template - static void __append_range(string& __dest, _Iter __b, _NullSentinal) { - const char __sentinal = char{}; - for (; *__b != __sentinal; ++__b) - __dest.push_back(*__b); - } - - template - static void __append_source(string& __dest, _Source const& __s) - { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; - - -class _LIBCPP_TYPE_VIS path -{ - template - using _EnableIfPathable = typename - enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; - - template - using _SourceChar = typename __is_pathable<_Tp>::__char_type; - - template - using _SourceCVT = _PathCVT<_SourceChar<_Tp>>; - -public: - typedef char value_type; - typedef basic_string string_type; - typedef _VSTD::string_view __string_view; - static _LIBCPP_CONSTEXPR value_type preferred_separator = '/'; - - enum class _LIBCPP_ENUM_VIS format : unsigned char { - auto_format, - native_format, - generic_format - }; - - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY path() _NOEXCEPT {} - _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} - _LIBCPP_INLINE_VISIBILITY path(path&& __p) _NOEXCEPT : __pn_(_VSTD::move(__p.__pn_)) {} - - _LIBCPP_INLINE_VISIBILITY - path(string_type&& __s, format = format::auto_format) _NOEXCEPT - : __pn_(_VSTD::move(__s)) {} - - template < - class _Source, - class = _EnableIfPathable<_Source, void> - > - path(const _Source& __src, format = format::auto_format) { - _SourceCVT<_Source>::__append_source(__pn_, __src); - } - - template - path(_InputIt __first, _InputIt __last, format = format::auto_format) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - } - - // TODO Implement locale conversions. - template - > - path(const _Source& __src, const locale& __loc, - format = format::auto_format); - template - path(_InputIt __first, _InputIt _last, const locale& __loc, - format = format::auto_format); - - _LIBCPP_INLINE_VISIBILITY - ~path() = default; - - // assignments - _LIBCPP_INLINE_VISIBILITY - path& operator=(const path& __p) { - __pn_ = __p.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator=(path&& __p) _NOEXCEPT { - __pn_ = _VSTD::move(__p.__pn_); - return *this; - } - - template - _LIBCPP_INLINE_VISIBILITY - path& operator=(string_type&& __s) _NOEXCEPT { - __pn_ = _VSTD::move(__s); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& assign(string_type&& __s) _NOEXCEPT { - __pn_ = _VSTD::move(__s); - return *this; - } - - template - _LIBCPP_INLINE_VISIBILITY - _EnableIfPathable<_Source> - operator=(const _Source& __src) - { return this->assign(__src); } - - - template - _EnableIfPathable<_Source> - assign(const _Source& __src) { - __pn_.clear(); - _SourceCVT<_Source>::__append_source(__pn_, __src); - return *this; - } - - template - path& assign(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - __pn_.clear(); - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - -private: - template - static bool __source_is_absolute(_ECharT __first_or_null) { - return __is_separator(__first_or_null); - } - -public: - // appends - path& operator/=(const path& __p) { - if (__p.is_absolute()) { - __pn_ = __p.__pn_; - return *this; - } - if (has_filename()) - __pn_ += preferred_separator; - __pn_ += __p.native(); - return *this; - } - - // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src - // is known at compile time to be "/' since the user almost certainly intended - // to append a separator instead of overwriting the path with "/" - template - _LIBCPP_INLINE_VISIBILITY - _EnableIfPathable<_Source> - operator/=(const _Source& __src) { - return this->append(__src); - } - - template - _EnableIfPathable<_Source> - append(const _Source& __src) { - using _Traits = __is_pathable<_Source>; - using _CVT = _PathCVT<_SourceChar<_Source>>; - if (__source_is_absolute(_Traits::__first_or_null(__src))) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_source(__pn_, __src); - return *this; - } - - template - path& append(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); - using _CVT = _PathCVT<_ItVal>; - if (__first != __last && __source_is_absolute(*__first)) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_range(__pn_, __first, __last); - return *this; - } - - // concatenation - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const path& __x) { - __pn_ += __x.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const string_type& __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(__string_view __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const value_type* __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(value_type __x) { - __pn_ += __x; - return *this; - } - - template - typename enable_if<__can_convert_char<_ECharT>::value, path&>::type - operator+=(_ECharT __x) - { - basic_string<_ECharT> __tmp; - __tmp += __x; - _PathCVT<_ECharT>::__append_source(__pn_, __tmp); - return *this; - } - - template - _EnableIfPathable<_Source> - operator+=(const _Source& __x) { - return this->concat(__x); - } - - template - _EnableIfPathable<_Source> - concat(const _Source& __x) { - _SourceCVT<_Source>::__append_source(__pn_, __x); - return *this; - } - - template - path& concat(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - - // modifiers - _LIBCPP_INLINE_VISIBILITY - void clear() _NOEXCEPT { - __pn_.clear(); - } - - path& make_preferred() { return *this; } - - _LIBCPP_INLINE_VISIBILITY - path& remove_filename() { - auto __fname = __filename(); - if (!__fname.empty()) - __pn_.erase(__fname.data() - __pn_.data()); - return *this; - } - - path& replace_filename(const path& __replacement) { - remove_filename(); - return (*this /= __replacement); - } - - path& replace_extension(const path& __replacement = path()); - - _LIBCPP_INLINE_VISIBILITY - void swap(path& __rhs) _NOEXCEPT { - __pn_.swap(__rhs.__pn_); - } - - // private helper to allow reserving memory in the path - _LIBCPP_INLINE_VISIBILITY - void __reserve(size_t __s) { __pn_.reserve(__s); } - - // native format observers - _LIBCPP_INLINE_VISIBILITY - const string_type& native() const _NOEXCEPT { - return __pn_; - } - - _LIBCPP_INLINE_VISIBILITY - const value_type* c_str() const _NOEXCEPT { return __pn_.c_str(); } - - _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } - - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - string(const _Allocator& __a = _Allocator()) const { - using _CVT = __widen_from_utf8; - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s(__a); - __s.reserve(__pn_.size()); - _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); - return __s; - } - - _LIBCPP_INLINE_VISIBILITY std::string string() const { return __pn_; } - _LIBCPP_INLINE_VISIBILITY std::wstring wstring() const { return string(); } - _LIBCPP_INLINE_VISIBILITY std::string u8string() const { return __pn_; } - _LIBCPP_INLINE_VISIBILITY std::u16string u16string() const { return string(); } - _LIBCPP_INLINE_VISIBILITY std::u32string u32string() const { return string(); } - - // generic format observers - template , - class _Allocator = allocator<_ECharT> - > - basic_string<_ECharT, _Traits, _Allocator> - generic_string(const _Allocator& __a = _Allocator()) const { - return string<_ECharT, _Traits, _Allocator>(__a); - } - - std::string generic_string() const { return __pn_; } - std::wstring generic_wstring() const { return string(); } - std::string generic_u8string() const { return __pn_; } - std::u16string generic_u16string() const { return string(); } - std::u32string generic_u32string() const { return string(); } - -private: - int __compare(__string_view) const; - __string_view __root_name() const; - __string_view __root_directory() const; - __string_view __root_path_raw() const; - __string_view __relative_path() const; - __string_view __parent_path() const; - __string_view __filename() const; - __string_view __stem() const; - __string_view __extension() const; - -public: - // compare - _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const _NOEXCEPT { return __compare(__p.__pn_);} - _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { return __compare(__s); } - _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { return __compare(__s); } - _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { return __compare(__s); } - - // decomposition - _LIBCPP_INLINE_VISIBILITY path root_name() const { return string_type(__root_name()); } - _LIBCPP_INLINE_VISIBILITY path root_directory() const { return string_type(__root_directory()); } - _LIBCPP_INLINE_VISIBILITY path root_path() const { return root_name().append(string_type(__root_directory())); } - _LIBCPP_INLINE_VISIBILITY path relative_path() const { return string_type(__relative_path()); } - _LIBCPP_INLINE_VISIBILITY path parent_path() const { return string_type(__parent_path()); } - _LIBCPP_INLINE_VISIBILITY path filename() const { return string_type(__filename()); } - _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem());} - _LIBCPP_INLINE_VISIBILITY path extension() const { return string_type(__extension()); } - - // query - _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY - bool empty() const _NOEXCEPT { return __pn_.empty(); } - - _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { return !__root_name().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { return !__root_directory().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { return !__root_path_raw().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { return !__relative_path().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { return !__parent_path().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_filename() const { return !__filename().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_extension() const { return !__extension().empty(); } - - _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { return has_root_directory(); } - _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } - - // relative paths - path lexically_normal() const; - path lexically_relative(const path& __base) const; - - _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { - path __result = this->lexically_relative(__base); - if (__result.native().empty()) - return *this; - return __result; - } - - // iterators - class _LIBCPP_TYPE_VIS iterator; - typedef iterator const_iterator; - - iterator begin() const; - iterator end() const; - - - template - _LIBCPP_INLINE_VISIBILITY - friend typename enable_if::value && - is_same<_Traits, char_traits>::value, - basic_ostream<_CharT, _Traits>& - >::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << std::__quoted(__p.native()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY - friend typename enable_if::value || - !is_same<_Traits, char_traits>::value, - basic_ostream<_CharT, _Traits>& - >::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << std::__quoted(__p.string<_CharT, _Traits>()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY - friend basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) - { - basic_string<_CharT, _Traits> __tmp; - __is >> __quoted(__tmp); - __p = __tmp; - return __is; - } - -private: - inline _LIBCPP_INLINE_VISIBILITY - path& __assign_view(__string_view const& __s) noexcept { __pn_ = string_type(__s); return *this; } - string_type __pn_; -}; - -inline _LIBCPP_ALWAYS_INLINE -void swap(path& __lhs, path& __rhs) _NOEXCEPT { - __lhs.swap(__rhs); -} - -_LIBCPP_FUNC_VIS -size_t hash_value(const path& __p) _NOEXCEPT; - -inline _LIBCPP_INLINE_VISIBILITY -bool operator==(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) == 0; } - -inline _LIBCPP_INLINE_VISIBILITY -bool operator!=(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) != 0; } - -inline _LIBCPP_INLINE_VISIBILITY -bool operator<(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) < 0; } - -inline _LIBCPP_INLINE_VISIBILITY -bool operator<=(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) <= 0; } - -inline _LIBCPP_INLINE_VISIBILITY -bool operator>(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) > 0; } - -inline _LIBCPP_INLINE_VISIBILITY -bool operator>=(const path& __lhs, const path& __rhs) _NOEXCEPT -{ return __lhs.compare(__rhs) >= 0; } - -inline _LIBCPP_INLINE_VISIBILITY -path operator/(const path& __lhs, const path& __rhs) { - return path(__lhs) /= __rhs; -} - -template -_LIBCPP_INLINE_VISIBILITY -typename enable_if<__is_pathable<_Source>::value, path>::type -u8path(const _Source& __s){ - static_assert(is_same::__char_type, char>::value, - "u8path(Source const&) requires Source have a character type of type 'char'"); - return path(__s); -} - -template -_LIBCPP_INLINE_VISIBILITY -typename enable_if<__is_pathable<_InputIt>::value, path>::type -u8path(_InputIt __f, _InputIt __l) { - static_assert(is_same::__char_type, char>::value, - "u8path(Iter, Iter) requires Iter have a value_type of type 'char'"); - return path(__f, __l); -} - -class _LIBCPP_TYPE_VIS path::iterator -{ -public: - typedef bidirectional_iterator_tag iterator_category; - - typedef path value_type; - typedef std::ptrdiff_t difference_type; - typedef const path* pointer; - typedef const path& reference; - - typedef void __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator -public: - _LIBCPP_INLINE_VISIBILITY - iterator() : __stashed_elem_(), __path_ptr_(nullptr), - __entry_(), __state_(__singular) {} - - iterator(const iterator&) = default; - ~iterator() = default; - - iterator& operator=(const iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - reference operator*() const { - return __stashed_elem_; - } - - _LIBCPP_INLINE_VISIBILITY - pointer operator->() const { - return &__stashed_elem_; - } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator++() { - _LIBCPP_ASSERT(__state_ != __singular, - "attempting to increment a singular iterator"); - _LIBCPP_ASSERT(__state_ != __at_end, - "attempting to increment the end iterator"); - return __increment(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator++(int) { - iterator __it(*this); - this->operator++(); - return __it; - } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator--() { - _LIBCPP_ASSERT(__state_ != __singular, - "attempting to decrement a singular iterator"); - _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), - "attempting to decrement the begin iterator"); - return __decrement(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator--(int) { - iterator __it(*this); - this->operator--(); - return __it; - } - -private: - friend class path; - - static constexpr unsigned char __singular = 0; - static constexpr unsigned char __at_end = 6; - - inline _LIBCPP_INLINE_VISIBILITY - friend bool operator==(const iterator&, const iterator&); - - iterator& __increment(); - iterator& __decrement(); - - path __stashed_elem_; - const path* __path_ptr_; - path::__string_view __entry_; - unsigned char __state_; -}; - -inline _LIBCPP_INLINE_VISIBILITY -bool operator==(const path::iterator& __lhs, const path::iterator& __rhs) { - return __lhs.__path_ptr_ == __rhs.__path_ptr_ && - __lhs.__entry_.data() == __rhs.__entry_.data(); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool operator!=(const path::iterator& __lhs, const path::iterator& __rhs) { - return !(__lhs == __rhs); -} - -class _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error -{ -public: - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, error_code __ec) - : system_error(__ec, __what), - __paths_(make_shared<_Storage>(path(), path())) - {} - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, error_code __ec) - : system_error(__ec, __what), - __paths_(make_shared<_Storage>(__p1, path())) - {} - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, const path& __p2, - error_code __ec) - : system_error(__ec, __what), - __paths_(make_shared<_Storage>(__p1, __p2)) - {} - - _LIBCPP_INLINE_VISIBILITY - const path& path1() const _NOEXCEPT { - return __paths_->first; - } - - _LIBCPP_INLINE_VISIBILITY - const path& path2() const _NOEXCEPT { - return __paths_->second; - } - - ~filesystem_error() override; // key function - - // TODO(ericwf): Create a custom error message. - //const char* what() const _NOEXCEPT; - -private: - typedef pair _Storage; - shared_ptr<_Storage> __paths_; -}; - -template -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE -#ifndef _LIBCPP_NO_EXCEPTIONS -void __throw_filesystem_error(_Args && ...__args) -{ - throw filesystem_error(std::forward<_Args>(__args)...); -} -#else -void __throw_filesystem_error(_Args&&...) -{ - _VSTD::abort(); -} -#endif +using namespace _VSTD_FS; +_LIBCPP_END_NAMESPACE_EXPERIMENTAL_FILESYSTEM -// operational functions - -_LIBCPP_FUNC_VIS -path __absolute(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -path __canonical(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -void __copy(const path& __from, const path& __to, copy_options __opt, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -bool __copy_file(const path& __from, const path& __to, copy_options __opt, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -bool __create_directories(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, const path & attributes, - error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -void __create_directory_symlink(const path& __to, const path& __new_symlink, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -void __create_hard_link(const path& __to, const path& __new_hard_link, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -void __create_symlink(const path& __to, const path& __new_symlink, - error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -path __current_path(error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -void __current_path(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -bool __equivalent(const path&, const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __file_size(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __hard_link_count(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -bool __fs_is_empty(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -file_time_type __last_write_time(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -void __last_write_time(const path& p, file_time_type new_time, - error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -void __permissions(const path&, perms, perm_options, error_code* = nullptr); -_LIBCPP_FUNC_VIS -path __read_symlink(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -bool __remove(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __remove_all(const path& p, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -void __rename(const path& from, const path& to, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -void __resize_file(const path& p, uintmax_t size, error_code *ec=nullptr); -_LIBCPP_FUNC_VIS -space_info __space(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -file_status __status(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -file_status __symlink_status(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -path __system_complete(const path&, error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -path __temp_directory_path(error_code *__ec=nullptr); -_LIBCPP_FUNC_VIS -path __weakly_canonical(path const& __p, error_code *__ec=nullptr); - -inline _LIBCPP_INLINE_VISIBILITY -path current_path() { - return __current_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY -path current_path(error_code& __ec) { - return __current_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void current_path(const path& __p) { - __current_path(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -void current_path(const path& __p, error_code& __ec) _NOEXCEPT { - __current_path(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path absolute(const path& __p) { - return __absolute(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -path absolute(const path& __p, error_code &__ec) { - return __absolute(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path canonical(const path& __p) { - return __canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -path canonical(const path& __p, error_code& __ec) { - return __canonical(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy(const path& __from, const path& __to) { - __copy(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy(const path& __from, const path& __to, error_code& __ec) { - __copy(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy(const path& __from, const path& __to, copy_options __opt) { - __copy(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy(const path& __from, const path& __to, - copy_options __opt, error_code& __ec) { - __copy(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool copy_file(const path& __from, const path& __to) { - return __copy_file(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool copy_file(const path& __from, const path& __to, error_code& __ec) { - return __copy_file(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool copy_file(const path& __from, const path& __to, copy_options __opt) { - return __copy_file(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool copy_file(const path& __from, const path& __to, - copy_options __opt, error_code& __ec){ - return __copy_file(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy_symlink(const path& __existing, const path& __new) { - __copy_symlink(__existing, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY -void copy_symlink(const path& __ext, const path& __new, error_code& __ec) _NOEXCEPT { - __copy_symlink(__ext, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directories(const path& __p) { - return __create_directories(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directories(const path& __p, error_code& __ec) { - return __create_directories(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directory(const path& __p) { - return __create_directory(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directory(const path& __p, error_code& __ec) _NOEXCEPT { - return __create_directory(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directory(const path& __p, const path& __attrs) { - return __create_directory(__p, __attrs); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool create_directory(const path& __p, const path& __attrs, error_code& __ec) _NOEXCEPT { - return __create_directory(__p, __attrs, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_directory_symlink(const path& __to, const path& __new) { - __create_directory_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_directory_symlink(const path& __to, const path& __new, - error_code& __ec) _NOEXCEPT { - __create_directory_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_hard_link(const path& __to, const path& __new) { - __create_hard_link(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_hard_link(const path& __to, const path& __new, error_code& __ec) _NOEXCEPT { - __create_hard_link(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_symlink(const path& __to, const path& __new) { - __create_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY -void create_symlink(const path& __to, const path& __new, - error_code& __ec) _NOEXCEPT { - return __create_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool status_known(file_status __s) _NOEXCEPT { - return __s.type() != file_type::none; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool exists(file_status __s) _NOEXCEPT { - return status_known(__s) && __s.type() != file_type::not_found; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool exists(const path& __p) { - return exists(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool exists(const path& __p, error_code& __ec) _NOEXCEPT { - auto __s = __status(__p, &__ec); - if (status_known(__s)) __ec.clear(); - return exists(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool equivalent(const path& __p1, const path& __p2) { - return __equivalent(__p1, __p2); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool equivalent(const path& __p1, const path& __p2, error_code& __ec) _NOEXCEPT { - return __equivalent(__p1, __p2, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t file_size(const path& __p) { - return __file_size(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t file_size(const path& __p, error_code& __ec) _NOEXCEPT { - return __file_size(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t hard_link_count(const path& __p) { - return __hard_link_count(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t hard_link_count(const path& __p, error_code& __ec) _NOEXCEPT { - return __hard_link_count(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_block_file(file_status __s) _NOEXCEPT { - return __s.type() == file_type::block; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_block_file(const path& __p) { - return is_block_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_block_file(const path& __p, error_code& __ec) _NOEXCEPT { - return is_block_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_character_file(file_status __s) _NOEXCEPT { - return __s.type() == file_type::character; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_character_file(const path& __p) { - return is_character_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_character_file(const path& __p, error_code& __ec) _NOEXCEPT { - return is_character_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_directory(file_status __s) _NOEXCEPT { - return __s.type() == file_type::directory; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_directory(const path& __p) { - return is_directory(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_directory(const path& __p, error_code& __ec) _NOEXCEPT { - return is_directory(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_empty(const path& __p) { - return __fs_is_empty(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_empty(const path& __p, error_code& __ec) { - return __fs_is_empty(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_fifo(file_status __s) _NOEXCEPT { - return __s.type() == file_type::fifo; -} -inline _LIBCPP_INLINE_VISIBILITY -bool is_fifo(const path& __p) { - return is_fifo(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_fifo(const path& __p, error_code& __ec) _NOEXCEPT { - return is_fifo(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_regular_file(file_status __s) _NOEXCEPT { - return __s.type() == file_type::regular; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_regular_file(const path& __p) { - return is_regular_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_regular_file(const path& __p, error_code& __ec) _NOEXCEPT { - return is_regular_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_socket(file_status __s) _NOEXCEPT { - return __s.type() == file_type::socket; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_socket(const path& __p) { - return is_socket(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_socket(const path& __p, error_code& __ec) _NOEXCEPT { - return is_socket(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_symlink(file_status __s) _NOEXCEPT { - return __s.type() == file_type::symlink; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_symlink(const path& __p) { - return is_symlink(__symlink_status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_symlink(const path& __p, error_code& __ec) _NOEXCEPT { - return is_symlink(__symlink_status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_other(file_status __s) _NOEXCEPT { - return exists(__s) - && !is_regular_file(__s) && !is_directory(__s) && !is_symlink(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_other(const path& __p) { - return is_other(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool is_other(const path& __p, error_code& __ec) _NOEXCEPT { - return is_other(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_time_type last_write_time(const path& __p) { - return __last_write_time(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_time_type last_write_time(const path& __p, error_code& __ec) _NOEXCEPT { - return __last_write_time(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void last_write_time(const path& __p, file_time_type __t) { - __last_write_time(__p, __t); -} - -inline _LIBCPP_INLINE_VISIBILITY -void last_write_time(const path& __p, file_time_type __t, error_code& __ec) _NOEXCEPT { - __last_write_time(__p, __t, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void permissions(const path& __p, perms __prms, - perm_options __opts = perm_options::replace) { - __permissions(__p, __prms, __opts); -} - -inline _LIBCPP_INLINE_VISIBILITY -void permissions(const path& __p, perms __prms, error_code& __ec) _NOEXCEPT { - __permissions(__p, __prms, perm_options::replace, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void permissions(const path& __p, perms __prms, perm_options __opts, - error_code& __ec) { - __permissions(__p, __prms, __opts, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path proximate(const path& __p, const path& __base, error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return {}; - path __tmp_base = __weakly_canonical(__base, &__ec); - if (__ec) - return {}; - return __tmp.lexically_proximate(__tmp_base); -} - -inline _LIBCPP_INLINE_VISIBILITY -path proximate(const path& __p, error_code& __ec) { - return proximate(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path proximate(const path& __p, const path& __base = current_path()) { - return __weakly_canonical(__p).lexically_proximate(__weakly_canonical(__base)); -} - -inline _LIBCPP_INLINE_VISIBILITY -path read_symlink(const path& __p) { - return __read_symlink(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -path read_symlink(const path& __p, error_code& __ec) { - return __read_symlink(__p, &__ec); -} - - -inline _LIBCPP_INLINE_VISIBILITY -path relative(const path& __p, const path& __base, error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return path(); - path __tmpbase = __weakly_canonical(__base, &__ec); - if (__ec) - return path(); - return __tmp.lexically_relative(__tmpbase); -} - -inline _LIBCPP_INLINE_VISIBILITY -path relative(const path& __p, error_code& __ec) { - return relative(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path relative(const path& __p, const path& __base=current_path()) { - return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); -} - - -inline _LIBCPP_INLINE_VISIBILITY -bool remove(const path& __p) { - return __remove(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -bool remove(const path& __p, error_code& __ec) _NOEXCEPT { - return __remove(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t remove_all(const path& __p) { - return __remove_all(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -uintmax_t remove_all(const path& __p, error_code& __ec) { - return __remove_all(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void rename(const path& __from, const path& __to) { - return __rename(__from, __to); -} - -inline _LIBCPP_INLINE_VISIBILITY -void rename(const path& __from, const path& __to, error_code& __ec) _NOEXCEPT { - return __rename(__from, __to, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -void resize_file(const path& __p, uintmax_t __ns) { - return __resize_file(__p, __ns); -} - -inline _LIBCPP_INLINE_VISIBILITY -void resize_file(const path& __p, uintmax_t __ns, error_code& __ec) _NOEXCEPT { - return __resize_file(__p, __ns, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -space_info space(const path& __p) { - return __space(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -space_info space(const path& __p, error_code& __ec) _NOEXCEPT { - return __space(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_status status(const path& __p) { - return __status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_status status(const path& __p, error_code& __ec) _NOEXCEPT { - return __status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_status symlink_status(const path& __p) { - return __symlink_status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -file_status symlink_status(const path& __p, error_code& __ec) _NOEXCEPT { - return __symlink_status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path temp_directory_path() { - return __temp_directory_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY -path temp_directory_path(error_code& __ec) { - return __temp_directory_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY -path weakly_canonical(path const& __p) { - return __weakly_canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY -path weakly_canonical(path const& __p, error_code& __ec) { - return __weakly_canonical(__p, &__ec); -} - - -class directory_entry -{ - typedef _VSTD_FS::path _Path; - -public: - // constructors and destructors - directory_entry() _NOEXCEPT = default; - directory_entry(directory_entry const&) = default; - directory_entry(directory_entry&&) _NOEXCEPT = default; - - _LIBCPP_INLINE_VISIBILITY - explicit directory_entry(_Path const& __p) : __p_(__p) {} - - ~directory_entry() {} - - directory_entry& operator=(directory_entry const&) = default; - directory_entry& operator=(directory_entry&&) _NOEXCEPT = default; - - _LIBCPP_INLINE_VISIBILITY - void assign(_Path const& __p) { - __p_ = __p; - } - - _LIBCPP_INLINE_VISIBILITY - void replace_filename(_Path const& __p) { - __p_ = __p_.parent_path() / __p; - } - - _LIBCPP_INLINE_VISIBILITY - _Path const& path() const _NOEXCEPT { - return __p_; - } - - _LIBCPP_INLINE_VISIBILITY - operator const _Path&() const _NOEXCEPT { - return __p_; - } - - _LIBCPP_INLINE_VISIBILITY - file_status status() const { - return _VSTD_FS::status(__p_); - } - - _LIBCPP_INLINE_VISIBILITY - file_status status(error_code& __ec) const _NOEXCEPT { - return _VSTD_FS::status(__p_, __ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status() const { - return _VSTD_FS::symlink_status(__p_); - } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status(error_code& __ec) const _NOEXCEPT { - return _VSTD_FS::symlink_status(__p_, __ec); - } - - _LIBCPP_INLINE_VISIBILITY - bool operator< (directory_entry const& __rhs) const _NOEXCEPT { - return __p_ < __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator==(directory_entry const& __rhs) const _NOEXCEPT { - return __p_ == __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator!=(directory_entry const& __rhs) const _NOEXCEPT { - return __p_ != __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator<=(directory_entry const& __rhs) const _NOEXCEPT { - return __p_ <= __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator> (directory_entry const& __rhs) const _NOEXCEPT { - return __p_ > __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator>=(directory_entry const& __rhs) const _NOEXCEPT { - return __p_ >= __rhs.__p_; - } -private: - _Path __p_; -}; - - -class directory_iterator; -class recursive_directory_iterator; -class __dir_stream; - -class __dir_element_proxy { -public: - - inline _LIBCPP_INLINE_VISIBILITY - directory_entry operator*() { return _VSTD::move(__elem_); } - -private: - friend class directory_iterator; - friend class recursive_directory_iterator; - explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} - __dir_element_proxy(__dir_element_proxy&& __o) : __elem_(_VSTD::move(__o.__elem_)) {} - directory_entry __elem_; -}; - -class directory_iterator -{ -public: - typedef directory_entry value_type; - typedef ptrdiff_t difference_type; - typedef value_type const* pointer; - typedef value_type const& reference; - typedef input_iterator_tag iterator_category; - -public: - //ctor & dtor - directory_iterator() _NOEXCEPT - { } - - explicit directory_iterator(const path& __p) - : directory_iterator(__p, nullptr) - { } - - directory_iterator(const path& __p, directory_options __opts) - : directory_iterator(__p, nullptr, __opts) - { } - - directory_iterator(const path& __p, error_code& __ec) - : directory_iterator(__p, &__ec) - { } - - directory_iterator(const path& __p, directory_options __opts, - error_code& __ec) - : directory_iterator(__p, &__ec, __opts) - { } - - directory_iterator(const directory_iterator&) = default; - directory_iterator(directory_iterator&&) = default; - directory_iterator& operator=(const directory_iterator&) = default; - - directory_iterator& operator=(directory_iterator&& __o) _NOEXCEPT { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - } - return *this; - } - - ~directory_iterator() = default; - - const directory_entry& operator*() const { - _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); - return __dereference(); - } - - const directory_entry* operator->() const - { return &**this; } - - directory_iterator& operator++() - { return __increment(); } - - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - directory_iterator& increment(error_code& __ec) - { return __increment(&__ec); } - -private: - inline _LIBCPP_INLINE_VISIBILITY - friend bool operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) _NOEXCEPT; - - // construct the dir_stream - _LIBCPP_FUNC_VIS - directory_iterator(const path&, error_code *, - directory_options = directory_options::none); - - _LIBCPP_FUNC_VIS - directory_iterator& __increment(error_code * __ec = nullptr); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - -private: - shared_ptr<__dir_stream> __imp_; -}; - - -inline _LIBCPP_INLINE_VISIBILITY -bool operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) _NOEXCEPT { - return __lhs.__imp_ == __rhs.__imp_; -} - -inline _LIBCPP_INLINE_VISIBILITY -bool operator!=(const directory_iterator& __lhs, - const directory_iterator& __rhs) _NOEXCEPT { - return !(__lhs == __rhs); -} - -// enable directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY -directory_iterator begin(directory_iterator __iter) _NOEXCEPT { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY -directory_iterator end(const directory_iterator&) _NOEXCEPT { - return directory_iterator(); -} - -class recursive_directory_iterator { -public: - using value_type = directory_entry; - using difference_type = std::ptrdiff_t; - using pointer = directory_entry const *; - using reference = directory_entry const &; - using iterator_category = std::input_iterator_tag; - -public: - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator() _NOEXCEPT - : __rec_(false) - {} - - _LIBCPP_INLINE_VISIBILITY - explicit recursive_directory_iterator(const path& __p, - directory_options __xoptions = directory_options::none) - : recursive_directory_iterator(__p, __xoptions, nullptr) - { } - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, - directory_options __xoptions, error_code& __ec) - : recursive_directory_iterator(__p, __xoptions, &__ec) - { } - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, error_code& __ec) - : recursive_directory_iterator(__p, directory_options::none, &__ec) - { } - - recursive_directory_iterator(const recursive_directory_iterator&) = default; - recursive_directory_iterator(recursive_directory_iterator&&) = default; - - recursive_directory_iterator & - operator=(const recursive_directory_iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator & - operator=(recursive_directory_iterator&& __o) noexcept { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - __rec_ = __o.__rec_; - } - return *this; - } - - ~recursive_directory_iterator() = default; - - _LIBCPP_INLINE_VISIBILITY - const directory_entry& operator*() const - { return __dereference(); } - - _LIBCPP_INLINE_VISIBILITY - const directory_entry* operator->() const - { return &__dereference(); } - - recursive_directory_iterator& operator++() - { return __increment(); } - - _LIBCPP_INLINE_VISIBILITY - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator& increment(error_code& __ec) - { return __increment(&__ec); } - - _LIBCPP_FUNC_VIS directory_options options() const; - _LIBCPP_FUNC_VIS int depth() const; - - _LIBCPP_INLINE_VISIBILITY - void pop() { __pop(); } - - _LIBCPP_INLINE_VISIBILITY - void pop(error_code& __ec) - { __pop(&__ec); } - - _LIBCPP_INLINE_VISIBILITY - bool recursion_pending() const - { return __rec_; } - - _LIBCPP_INLINE_VISIBILITY - void disable_recursion_pending() - { __rec_ = false; } - -private: - recursive_directory_iterator(const path& __p, directory_options __opt, - error_code *__ec); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - - _LIBCPP_FUNC_VIS - bool __try_recursion(error_code* __ec); - - _LIBCPP_FUNC_VIS - void __advance(error_code* __ec=nullptr); - - _LIBCPP_FUNC_VIS - recursive_directory_iterator& __increment(error_code *__ec=nullptr); - - _LIBCPP_FUNC_VIS - void __pop(error_code* __ec=nullptr); - - inline _LIBCPP_INLINE_VISIBILITY - friend bool operator==(const recursive_directory_iterator&, - const recursive_directory_iterator&) _NOEXCEPT; - - struct __shared_imp; - shared_ptr<__shared_imp> __imp_; - bool __rec_; -}; // class recursive_directory_iterator - - -inline _LIBCPP_INLINE_VISIBILITY -bool operator==(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) _NOEXCEPT -{ - return __lhs.__imp_ == __rhs.__imp_; -} - -_LIBCPP_INLINE_VISIBILITY -inline bool operator!=(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) _NOEXCEPT -{ - return !(__lhs == __rhs); -} -// enable recursive_directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY -recursive_directory_iterator begin(recursive_directory_iterator __iter) _NOEXCEPT { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY -recursive_directory_iterator end(const recursive_directory_iterator&) _NOEXCEPT { - return recursive_directory_iterator(); -} +#endif // !_LIBCPP_CXX03_LANG -_LIBCPP_END_NAMESPACE_EXPERIMENTAL_FILESYSTEM +_LIBCPP_POP_MACROS #endif // _LIBCPP_EXPERIMENTAL_FILESYSTEM diff --git a/include/c++/v1/experimental/memory_resource b/include/c++/v1/experimental/memory_resource index d101f3e..221ce5b 100644 --- a/include/c++/v1/experimental/memory_resource +++ b/include/c++/v1/experimental/memory_resource @@ -71,7 +71,7 @@ namespace pmr { #include #include #include -#include +#include <__tuple> #include #include #include @@ -96,7 +96,7 @@ size_t __aligned_allocation_size(size_t __s, size_t __a) _NOEXCEPT } // 8.5, memory.resource -class _LIBCPP_TEMPLATE_VIS memory_resource +class _LIBCPP_TYPE_VIS memory_resource { static const size_t __max_align = alignof(max_align_t); @@ -206,7 +206,7 @@ public: void construct(_Tp* __p, _Ts &&... __args) { _VSTD_LFTS::__lfts_user_alloc_construct( - __p, resource(), _VSTD::forward<_Ts>(__args)... + __p, *this, _VSTD::forward<_Ts>(__args)... ); } @@ -218,14 +218,14 @@ public: ::new ((void*)__p) pair<_T1, _T2>(piecewise_construct , __transform_tuple( typename __lfts_uses_alloc_ctor< - _T1, memory_resource*, _Args1... + _T1, polymorphic_allocator&, _Args1... >::type() , _VSTD::move(__x) , typename __make_tuple_indices::type{} ) , __transform_tuple( typename __lfts_uses_alloc_ctor< - _T2, memory_resource*, _Args2... + _T2, polymorphic_allocator&, _Args2... >::type() , _VSTD::move(__y) , typename __make_tuple_indices::type{} @@ -289,23 +289,23 @@ private: template _LIBCPP_INLINE_VISIBILITY - tuple + tuple __transform_tuple(integral_constant, tuple<_Args...> && __t, - __tuple_indices<_Idx...>) const + __tuple_indices<_Idx...>) { - using _Tup = tuple; - return _Tup(allocator_arg, resource(), + using _Tup = tuple; + return _Tup(allocator_arg, *this, _VSTD::get<_Idx>(_VSTD::move(__t))...); } template _LIBCPP_INLINE_VISIBILITY - tuple<_Args&&..., memory_resource*> + tuple<_Args&&..., polymorphic_allocator&> __transform_tuple(integral_constant, tuple<_Args...> && __t, - __tuple_indices<_Idx...>) const + __tuple_indices<_Idx...>) { - using _Tup = tuple<_Args&&..., memory_resource*>; - return _Tup(_VSTD::get<_Idx>(_VSTD::move(__t))..., resource()); + using _Tup = tuple<_Args&&..., polymorphic_allocator&>; + return _Tup(_VSTD::get<_Idx>(_VSTD::move(__t))..., *this); } _LIBCPP_INLINE_VISIBILITY diff --git a/include/c++/v1/experimental/simd b/include/c++/v1/experimental/simd new file mode 100644 index 0000000..6580443 --- /dev/null +++ b/include/c++/v1/experimental/simd @@ -0,0 +1,1570 @@ +// -*- C++ -*- +//===------------------------------- simd ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _LIBCPP_EXPERIMENTAL_SIMD +#define _LIBCPP_EXPERIMENTAL_SIMD + +/* + experimental/simd synopsis + +namespace std::experimental { + +inline namespace parallelism_v2 { + +namespace simd_abi { + +struct scalar {}; +template struct fixed_size {}; +template inline constexpr int max_fixed_size = implementation-defined; +template using compatible = implementation-defined; +template using native = implementation-defined; + +} // simd_abi + +struct element_aligned_tag {}; +struct vector_aligned_tag {}; +template struct overaligned_tag {}; +inline constexpr element_aligned_tag element_aligned{}; +inline constexpr vector_aligned_tag vector_aligned{}; +template inline constexpr overaligned_tag overaligned{}; + +// traits [simd.traits] +template struct is_abi_tag; +template inline constexpr bool is_abi_tag_v = is_abi_tag::value; + +template struct is_simd; +template inline constexpr bool is_simd_v = is_simd::value; + +template struct is_simd_mask; +template inline constexpr bool is_simd_mask_v = is_simd_mask::value; + +template struct is_simd_flag_type; +template inline constexpr bool is_simd_flag_type_v = is_simd_flag_type::value; + +template struct abi_for_size { using type = see below; }; +template using abi_for_size_t = typename abi_for_size::type; + +template > struct simd_size; +template > +inline constexpr size_t simd_size_v = simd_size::value; + +template struct memory_alignment; +template +inline constexpr size_t memory_alignment_v = memory_alignment::value; + +// class template simd [simd.class] +template > class simd; +template using native_simd = simd>; +template using fixed_size_simd = simd>; + +// class template simd_mask [simd.mask.class] +template > class simd_mask; +template using native_simd_mask = simd_mask>; +template using fixed_size_simd_mask = simd_mask>; + +// casts [simd.casts] +template see below simd_cast(const simd&); +template see below static_simd_cast(const simd&); + +template +fixed_size_simd> to_fixed_size(const simd&) noexcept; +template +fixed_size_simd_mask> to_fixed_size(const simd_mask&) noexcept; +template native_simd to_native(const fixed_size_simd&) noexcept; +template +native_simd_mask to_native(const fixed_size_simd_mask> &) noexcept; +template simd to_compatible(const fixed_size_simd&) noexcept; +template simd_mask to_compatible(const fixed_size_simd_mask&) noexcept; + +template +tuple>...> split(const simd&); +template +tuple>...> split(const simd_mask&); +template +array / V::size()> split( +const simd&); +template +array / V::size()> split( +const simd_mask&); + +template +simd + ...)>> concat(const simd&...); +template +simd_mask + ...)>> concat(const simd_mask&...); + +// reductions [simd.mask.reductions] +template bool all_of(const simd_mask&) noexcept; +template bool any_of(const simd_mask&) noexcept; +template bool none_of(const simd_mask&) noexcept; +template bool some_of(const simd_mask&) noexcept; +template int popcount(const simd_mask&) noexcept; +template int find_first_set(const simd_mask&); +template int find_last_set(const simd_mask&); + +bool all_of(see below) noexcept; +bool any_of(see below) noexcept; +bool none_of(see below) noexcept; +bool some_of(see below) noexcept; +int popcount(see below) noexcept; +int find_first_set(see below) noexcept; +int find_last_set(see below) noexcept; + +// masked assignment [simd.whereexpr] +template class const_where_expression; +template class where_expression; + +// masked assignment [simd.mask.where] +template struct nodeduce { using type = T; }; // exposition only + +template using nodeduce_t = typename nodeduce::type; // exposition only + +template +where_expression, simd> +where(const typename simd::mask_type&, simd&) noexcept; + +template +const_where_expression, const simd> +where(const typename simd::mask_type&, const simd&) noexcept; + +template +where_expression, simd_mask> +where(const nodeduce_t>&, simd_mask&) noexcept; + +template +const_where_expression, const simd_mask> +where(const nodeduce_t>&, const simd_mask&) noexcept; + +template where_expression where(see below k, T& d) noexcept; + +template +const_where_expression where(see below k, const T& d) noexcept; + +// reductions [simd.reductions] +template > +T reduce(const simd&, BinaryOperation = BinaryOperation()); + +template +typename V::value_type reduce(const const_where_expression& x, +typename V::value_type neutral_element, BinaryOperation binary_op); + +template +typename V::value_type reduce(const const_where_expression& x, plus<> binary_op = plus<>()); + +template +typename V::value_type reduce(const const_where_expression& x, multiplies<> binary_op); + +template +typename V::value_type reduce(const const_where_expression& x, bit_and<> binary_op); + +template +typename V::value_type reduce(const const_where_expression& x, bit_or<> binary_op); + +template +typename V::value_type reduce(const const_where_expression& x, bit_xor<> binary_op); + +template T hmin(const simd&); +template T hmin(const const_where_expression&); +template T hmax(const simd&); +template T hmax(const const_where_expression&); + +// algorithms [simd.alg] +template simd min(const simd&, const simd&) noexcept; + +template simd max(const simd&, const simd&) noexcept; + +template +std::pair, simd> minmax(const simd&, const simd&) noexcept; + +template +simd clamp(const simd& v, const simd& lo, const simd& hi); + +// [simd.whereexpr] +template +class const_where_expression { + const M& mask; // exposition only + T& data; // exposition only +public: + const_where_expression(const const_where_expression&) = delete; + const_where_expression& operator=(const const_where_expression&) = delete; + remove_const_t operator-() const &&; + template void copy_to(U* mem, Flags f) const &&; +}; + +template +class where_expression : public const_where_expression { +public: + where_expression(const where_expression&) = delete; + where_expression& operator=(const where_expression&) = delete; + template void operator=(U&& x); + template void operator+=(U&& x); + template void operator-=(U&& x); + template void operator*=(U&& x); + template void operator/=(U&& x); + template void operator%=(U&& x); + template void operator&=(U&& x); + template void operator|=(U&& x); + template void operator^=(U&& x); + template void operator<<=(U&& x); + template void operator>>=(U&& x); + void operator++(); + void operator++(int); + void operator--(); + void operator--(int); + template void copy_from(const U* mem, Flags); +}; + +// [simd.class] +template class simd { +public: + using value_type = T; + using reference = see below; + using mask_type = simd_mask; + + using abi_type = Abi; + static constexpr size_t size() noexcept; + simd() = default; + + // implicit type conversion constructor + template simd(const simd>&); + + // implicit broadcast constructor (see below for constraints) + template simd(U&& value); + + // generator constructor (see below for constraints) + template explicit simd(G&& gen); + + // load constructor + template simd(const U* mem, Flags f); + + // loads [simd.load] + template void copy_from(const U* mem, Flags f); + + // stores [simd.store] + template void copy_to(U* mem, Flags f) const; + + // scalar access [simd.subscr] + reference operator[](size_t); + value_type operator[](size_t) const; + + // unary operators [simd.unary] + simd& operator++(); + simd operator++(int); + simd& operator--(); + simd operator--(int); + mask_type operator!() const; + simd operator~() const; // see below + simd operator+() const; + simd operator-() const; + + // binary operators [simd.binary] + friend simd operator+ (const simd&, const simd&); + friend simd operator- (const simd&, const simd&); + friend simd operator* (const simd&, const simd&); + friend simd operator/ (const simd&, const simd&); + friend simd operator% (const simd&, const simd&); + friend simd operator& (const simd&, const simd&); + friend simd operator| (const simd&, const simd&); + friend simd operator^ (const simd&, const simd&); + friend simd operator<<(const simd&, const simd&); + friend simd operator>>(const simd&, const simd&); + friend simd operator<<(const simd&, int); + friend simd operator>>(const simd&, int); + + // compound assignment [simd.cassign] + friend simd& operator+= (simd&, const simd&); + friend simd& operator-= (simd&, const simd&); + friend simd& operator*= (simd&, const simd&); + friend simd& operator/= (simd&, const simd&); + friend simd& operator%= (simd&, const simd&); + + friend simd& operator&= (simd&, const simd&); + friend simd& operator|= (simd&, const simd&); + friend simd& operator^= (simd&, const simd&); + friend simd& operator<<=(simd&, const simd&); + friend simd& operator>>=(simd&, const simd&); + friend simd& operator<<=(simd&, int); + friend simd& operator>>=(simd&, int); + + // compares [simd.comparison] + friend mask_type operator==(const simd&, const simd&); + friend mask_type operator!=(const simd&, const simd&); + friend mask_type operator>=(const simd&, const simd&); + friend mask_type operator<=(const simd&, const simd&); + friend mask_type operator> (const simd&, const simd&); + friend mask_type operator< (const simd&, const simd&); +}; + +// [simd.math] +template using scharv = simd; // exposition only +template using shortv = simd; // exposition only +template using intv = simd; // exposition only +template using longv = simd; // exposition only +template using llongv = simd; // exposition only +template using floatv = simd; // exposition only +template using doublev = simd; // exposition only +template using ldoublev = simd; // exposition only +template using samesize = fixed_size_simd; // exposition only + +template floatv acos(floatv x); +template doublev acos(doublev x); +template ldoublev acos(ldoublev x); + +template floatv asin(floatv x); +template doublev asin(doublev x); +template ldoublev asin(ldoublev x); + +template floatv atan(floatv x); +template doublev atan(doublev x); +template ldoublev atan(ldoublev x); + +template floatv atan2(floatv y, floatv x); +template doublev atan2(doublev y, doublev x); +template ldoublev atan2(ldoublev y, ldoublev x); + +template floatv cos(floatv x); +template doublev cos(doublev x); +template ldoublev cos(ldoublev x); + +template floatv sin(floatv x); +template doublev sin(doublev x); +template ldoublev sin(ldoublev x); + +template floatv tan(floatv x); +template doublev tan(doublev x); +template ldoublev tan(ldoublev x); + +template floatv acosh(floatv x); +template doublev acosh(doublev x); +template ldoublev acosh(ldoublev x); + +template floatv asinh(floatv x); +template doublev asinh(doublev x); +template ldoublev asinh(ldoublev x); + +template floatv atanh(floatv x); +template doublev atanh(doublev x); +template ldoublev atanh(ldoublev x); + +template floatv cosh(floatv x); +template doublev cosh(doublev x); +template ldoublev cosh(ldoublev x); + +template floatv sinh(floatv x); +template doublev sinh(doublev x); +template ldoublev sinh(ldoublev x); + +template floatv tanh(floatv x); +template doublev tanh(doublev x); +template ldoublev tanh(ldoublev x); + +template floatv exp(floatv x); +template doublev exp(doublev x); +template ldoublev exp(ldoublev x); + +template floatv exp2(floatv x); +template doublev exp2(doublev x); +template ldoublev exp2(ldoublev x); + +template floatv expm1(floatv x); +template doublev expm1(doublev x); +template ldoublev expm1(ldoublev x); + +template floatv frexp(floatv value, samesize>* exp); +template doublev frexp(doublev value, samesize>* exp); +template ldoublev frexp(ldoublev value, samesize>* exp); + +template samesize> ilogb(floatv x); +template samesize> ilogb(doublev x); +template samesize> ilogb(ldoublev x); + +template floatv ldexp(floatv x, samesize> exp); +template doublev ldexp(doublev x, samesize> exp); +template ldoublev ldexp(ldoublev x, samesize> exp); + +template floatv log(floatv x); +template doublev log(doublev x); +template ldoublev log(ldoublev x); + +template floatv log10(floatv x); +template doublev log10(doublev x); +template ldoublev log10(ldoublev x); + +template floatv log1p(floatv x); +template doublev log1p(doublev x); +template ldoublev log1p(ldoublev x); + +template floatv log2(floatv x); +template doublev log2(doublev x); +template ldoublev log2(ldoublev x); + +template floatv logb(floatv x); +template doublev logb(doublev x); +template ldoublev logb(ldoublev x); + +template floatv modf(floatv value, floatv* iptr); +template doublev modf(doublev value, doublev* iptr); +template ldoublev modf(ldoublev value, ldoublev* iptr); + +template floatv scalbn(floatv x, samesize> n); +template doublev scalbn(doublev x, samesize> n); +template ldoublev scalbn(ldoublev x, samesize> n); +template floatv scalbln(floatv x, samesize> n); +template doublev scalbln(doublev x, samesize> n); +template ldoublev scalbln(ldoublev x, samesize> n); + +template floatv cbrt(floatv x); +template doublev cbrt(doublev x); +template ldoublev cbrt(ldoublev x); + +template scharv abs(scharv j); +template shortv abs(shortv j); +template intv abs(intv j); +template longv abs(longv j); +template llongv abs(llongv j); +template floatv abs(floatv j); +template doublev abs(doublev j); +template ldoublev abs(ldoublev j); + +template floatv hypot(floatv x, floatv y); +template doublev hypot(doublev x, doublev y); +template ldoublev hypot(doublev x, doublev y); +template floatv hypot(floatv x, floatv y, floatv z); +template doublev hypot(doublev x, doublev y, doublev z); +template ldoublev hypot(ldoublev x, ldoublev y, ldoublev z); + +template floatv pow(floatv x, floatv y); +template doublev pow(doublev x, doublev y); +template ldoublev pow(ldoublev x, ldoublev y); + +template floatv sqrt(floatv x); +template doublev sqrt(doublev x); +template ldoublev sqrt(ldoublev x); + +template floatv erf(floatv x); +template doublev erf(doublev x); +template ldoublev erf(ldoublev x); +template floatv erfc(floatv x); +template doublev erfc(doublev x); +template ldoublev erfc(ldoublev x); + +template floatv lgamma(floatv x); +template doublev lgamma(doublev x); +template ldoublev lgamma(ldoublev x); + +template floatv tgamma(floatv x); +template doublev tgamma(doublev x); +template ldoublev tgamma(ldoublev x); + +template floatv ceil(floatv x); +template doublev ceil(doublev x); +template ldoublev ceil(ldoublev x); + +template floatv floor(floatv x); +template doublev floor(doublev x); +template ldoublev floor(ldoublev x); + +template floatv nearbyint(floatv x); +template doublev nearbyint(doublev x); +template ldoublev nearbyint(ldoublev x); + +template floatv rint(floatv x); +template doublev rint(doublev x); +template ldoublev rint(ldoublev x); + +template samesize> lrint(floatv x); +template samesize> lrint(doublev x); +template samesize> lrint(ldoublev x); +template samesize> llrint(floatv x); +template samesize> llrint(doublev x); +template samesize> llrint(ldoublev x); + +template floatv round(floatv x); +template doublev round(doublev x); +template ldoublev round(ldoublev x); +template samesize> lround(floatv x); +template samesize> lround(doublev x); +template samesize> lround(ldoublev x); +template samesize> llround(floatv x); +template samesize> llround(doublev x); +template samesize> llround(ldoublev x); + +template floatv trunc(floatv x); +template doublev trunc(doublev x); +template ldoublev trunc(ldoublev x); + +template floatv fmod(floatv x, floatv y); +template doublev fmod(doublev x, doublev y); +template ldoublev fmod(ldoublev x, ldoublev y); + +template floatv remainder(floatv x, floatv y); +template doublev remainder(doublev x, doublev y); +template ldoublev remainder(ldoublev x, ldoublev y); + +template floatv remquo(floatv x, floatv y, samesize>* quo); +template doublev remquo(doublev x, doublev y, samesize>* quo); +template ldoublev remquo(ldoublev x, ldoublev y, samesize>* quo); + +template floatv copysign(floatv x, floatv y); +template doublev copysign(doublev x, doublev y); +template ldoublev copysign(ldoublev x, ldoublev y); + +template doublev nan(const char* tagp); +template floatv nanf(const char* tagp); +template ldoublev nanl(const char* tagp); + +template floatv nextafter(floatv x, floatv y); +template doublev nextafter(doublev x, doublev y); +template ldoublev nextafter(ldoublev x, ldoublev y); + +template floatv nexttoward(floatv x, ldoublev y); +template doublev nexttoward(doublev x, ldoublev y); +template ldoublev nexttoward(ldoublev x, ldoublev y); + +template floatv fdim(floatv x, floatv y); +template doublev fdim(doublev x, doublev y); +template ldoublev fdim(ldoublev x, ldoublev y); + +template floatv fmax(floatv x, floatv y); +template doublev fmax(doublev x, doublev y); +template ldoublev fmax(ldoublev x, ldoublev y); + +template floatv fmin(floatv x, floatv y); +template doublev fmin(doublev x, doublev y); +template ldoublev fmin(ldoublev x, ldoublev y); + +template floatv fma(floatv x, floatv y, floatv z); +template doublev fma(doublev x, doublev y, doublev z); +template ldoublev fma(ldoublev x, ldoublev y, ldoublev z); + +template samesize> fpclassify(floatv x); +template samesize> fpclassify(doublev x); +template samesize> fpclassify(ldoublev x); + +template simd_mask isfinite(floatv x); +template simd_mask isfinite(doublev x); +template simd_mask isfinite(ldoublev x); + +template simd_mask isinf(floatv x); +template simd_mask isinf(doublev x); +template simd_mask isinf(ldoublev x); + +template simd_mask isnan(floatv x); +template simd_mask isnan(doublev x); +template simd_mask isnan(ldoublev x); + +template simd_mask isnormal(floatv x); +template simd_mask isnormal(doublev x); +template simd_mask isnormal(ldoublev x); + +template simd_mask signbit(floatv x); +template simd_mask signbit(doublev x); +template simd_mask signbit(ldoublev x); + +template simd_mask isgreater(floatv x, floatv y); +template simd_mask isgreater(doublev x, doublev y); +template simd_mask isgreater(ldoublev x, ldoublev y); + +template simd_mask isgreaterequal(floatv x, floatv y); +template simd_mask isgreaterequal(doublev x, doublev y); +template simd_mask isgreaterequal(ldoublev x, ldoublev y); + +template simd_mask isless(floatv x, floatv y); +template simd_mask isless(doublev x, doublev y); +template simd_mask isless(ldoublev x, ldoublev y); + +template simd_mask islessequal(floatv x, floatv y); +template simd_mask islessequal(doublev x, doublev y); +template simd_mask islessequal(ldoublev x, ldoublev y); + +template simd_mask islessgreater(floatv x, floatv y); +template simd_mask islessgreater(doublev x, doublev y); +template simd_mask islessgreater(ldoublev x, ldoublev y); + +template simd_mask isunordered(floatv x, floatv y); +template simd_mask isunordered(doublev x, doublev y); +template simd_mask isunordered(ldoublev x, ldoublev y); + +template struct simd_div_t { V quot, rem; }; +template simd_div_t> div(scharv numer, scharv denom); +template simd_div_t> div(shortv numer, shortv denom); +template simd_div_t> div(intv numer, intv denom); +template simd_div_t> div(longv numer, longv denom); +template simd_div_t> div(llongv numer, llongv denom); + +// [simd.mask.class] +template +class simd_mask { +public: + using value_type = bool; + using reference = see below; + using simd_type = simd; + using abi_type = Abi; + static constexpr size_t size() noexcept; + simd_mask() = default; + + // broadcast constructor + explicit simd_mask(value_type) noexcept; + + // implicit type conversion constructor + template simd_mask(const simd_mask>&) noexcept; + + // load constructor + template simd_mask(const value_type* mem, Flags); + + // loads [simd.mask.copy] + template void copy_from(const value_type* mem, Flags); + template void copy_to(value_type* mem, Flags) const; + + // scalar access [simd.mask.subscr] + reference operator[](size_t); + value_type operator[](size_t) const; + + // unary operators [simd.mask.unary] + simd_mask operator!() const noexcept; + + // simd_mask binary operators [simd.mask.binary] + friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator& (const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator| (const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator^ (const simd_mask&, const simd_mask&) noexcept; + + // simd_mask compound assignment [simd.mask.cassign] + friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept; + friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept; + friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept; + + // simd_mask compares [simd.mask.comparison] + friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; +}; + +} // parallelism_v2 +} // std::experimental + +*/ + +#include +#include +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD + +#if _LIBCPP_STD_VER >= 17 + +enum class _StorageKind { + _Scalar, + _Array, + _VecExt, +}; + +template <_StorageKind __kind, int _Np> +struct __simd_abi {}; + +template +class __simd_storage {}; + +template +class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> { + std::array<_Tp, __num_element> __storage_; + + template + friend struct simd; + + template + friend struct simd_mask; + +public: + _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }; + void __set(size_t __index, _Tp __val) noexcept { + __storage_[__index] = __val; + } +}; + +template +class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>> { + _Tp __storage_; + + template + friend struct simd; + + template + friend struct simd_mask; + +public: + _Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; }; + void __set(size_t __index, _Tp __val) noexcept { + (&__storage_)[__index] = __val; + } +}; + +#ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION + +constexpr size_t __floor_pow_of_2(size_t __val) { + return ((__val - 1) & __val) == 0 ? __val + : __floor_pow_of_2((__val - 1) & __val); +} + +constexpr size_t __ceil_pow_of_2(size_t __val) { + return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1; +} + +template +struct __vec_ext_traits { +#if !defined(_LIBCPP_COMPILER_CLANG) + typedef _Tp type __attribute__((vector_size(__ceil_pow_of_2(__bytes)))); +#endif +}; + +#if defined(_LIBCPP_COMPILER_CLANG) +#define _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, _NUM_ELEMENT) \ + template <> \ + struct __vec_ext_traits<_TYPE, sizeof(_TYPE) * _NUM_ELEMENT> { \ + using type = \ + _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \ + } + +#define _LIBCPP_SPECIALIZE_VEC_EXT_32(_TYPE) \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 1); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 2); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 3); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 4); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 5); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 6); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 7); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 8); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 9); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 10); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 11); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 12); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 13); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 14); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 15); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 16); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 17); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 18); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 19); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 20); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 21); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 22); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 23); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 24); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 25); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 26); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 27); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 28); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 29); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 30); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 31); \ + _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 32); + +_LIBCPP_SPECIALIZE_VEC_EXT_32(char); +_LIBCPP_SPECIALIZE_VEC_EXT_32(char16_t); +_LIBCPP_SPECIALIZE_VEC_EXT_32(char32_t); +_LIBCPP_SPECIALIZE_VEC_EXT_32(wchar_t); +_LIBCPP_SPECIALIZE_VEC_EXT_32(signed char); +_LIBCPP_SPECIALIZE_VEC_EXT_32(signed short); +_LIBCPP_SPECIALIZE_VEC_EXT_32(signed int); +_LIBCPP_SPECIALIZE_VEC_EXT_32(signed long); +_LIBCPP_SPECIALIZE_VEC_EXT_32(signed long long); +_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned char); +_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned short); +_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned int); +_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long); +_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long long); +_LIBCPP_SPECIALIZE_VEC_EXT_32(float); +_LIBCPP_SPECIALIZE_VEC_EXT_32(double); +_LIBCPP_SPECIALIZE_VEC_EXT_32(long double); + +#undef _LIBCPP_SPECIALIZE_VEC_EXT_32 +#undef _LIBCPP_SPECIALIZE_VEC_EXT +#endif + +template +class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> { + using _StorageType = + typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type; + + _StorageType __storage_; + + template + friend struct simd; + + template + friend struct simd_mask; + +public: + _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }; + void __set(size_t __index, _Tp __val) noexcept { + __storage_[__index] = __val; + } +}; + +#endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION + +template +class __simd_reference { + static_assert(std::is_same<_Vp, _Tp>::value, ""); + + template + friend struct simd; + + template + friend struct simd_mask; + + __simd_storage<_Tp, _Abi>* __ptr_; + size_t __index_; + + __simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index) + : __ptr_(__ptr), __index_(__index) {} + + __simd_reference(const __simd_reference&) = default; + +public: + __simd_reference() = delete; + __simd_reference& operator=(const __simd_reference&) = delete; + + operator _Vp() const { return __ptr_->__get(__index_); } + + __simd_reference operator=(_Vp __value) && { + __ptr_->__set(__index_, __value); + return *this; + } + + __simd_reference operator++() && { + return std::move(*this) = __ptr_->__get(__index_) + 1; + } + + _Vp operator++(int) && { + auto __val = __ptr_->__get(__index_); + __ptr_->__set(__index_, __val + 1); + return __val; + } + + __simd_reference operator--() && { + return std::move(*this) = __ptr_->__get(__index_) - 1; + } + + _Vp operator--(int) && { + auto __val = __ptr_->__get(__index_); + __ptr_->__set(__index_, __val - 1); + return __val; + } + + __simd_reference operator+=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) + __value; + } + + __simd_reference operator-=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) - __value; + } + + __simd_reference operator*=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) * __value; + } + + __simd_reference operator/=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) / __value; + } + + __simd_reference operator%=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) % __value; + } + + __simd_reference operator>>=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) >> __value; + } + + __simd_reference operator<<=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) << __value; + } + + __simd_reference operator&=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) & __value; + } + + __simd_reference operator|=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) | __value; + } + + __simd_reference operator^=(_Vp __value) && { + return std::move(*this) = __ptr_->__get(__index_) ^ __value; + } +}; + +template +constexpr decltype(_To{std::declval<_From>()}, true) +__is_non_narrowing_convertible_impl(_From) { + return true; +} + +template +constexpr bool __is_non_narrowing_convertible_impl(...) { + return false; +} + +template +constexpr typename std::enable_if::value && + std::is_arithmetic<_From>::value, + bool>::type +__is_non_narrowing_arithmetic_convertible() { + return __is_non_narrowing_convertible_impl<_To>(_From{}); +} + +template +constexpr typename std::enable_if::value && + std::is_arithmetic<_From>::value), + bool>::type +__is_non_narrowing_arithmetic_convertible() { + return false; +} + +template +constexpr _Tp __variadic_sum() { + return _Tp{}; +} + +template +constexpr _Tp __variadic_sum(_Up __first, _Args... __rest) { + return static_cast<_Tp>(__first) + __variadic_sum<_Tp>(__rest...); +} + +template +struct __nodeduce { + using type = _Tp; +}; + +template +constexpr bool __vectorizable() { + return std::is_arithmetic<_Tp>::value && !std::is_const<_Tp>::value && + !std::is_volatile<_Tp>::value && !std::is_same<_Tp, bool>::value; +} + +_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD +_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI + +using scalar = __simd_abi<_StorageKind::_Scalar, 1>; + +template +using fixed_size = __simd_abi<_StorageKind::_Array, _Np>; + +template +_LIBCPP_INLINE_VAR constexpr size_t max_fixed_size = 32; + +template +using compatible = fixed_size<16 / sizeof(_Tp)>; + +#ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION +template +using native = __simd_abi<_StorageKind::_VecExt, + _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>; +#else +template +using native = + fixed_size<_Tp, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>; +#endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION + +_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI +_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD + +template > +class simd; +template > +class simd_mask; + +struct element_aligned_tag {}; +struct vector_aligned_tag {}; +template +struct overaligned_tag {}; +_LIBCPP_INLINE_VAR constexpr element_aligned_tag element_aligned{}; +_LIBCPP_INLINE_VAR constexpr vector_aligned_tag vector_aligned{}; +template +_LIBCPP_INLINE_VAR constexpr overaligned_tag<_Np> overaligned{}; + +// traits [simd.traits] +template +struct is_abi_tag : std::integral_constant {}; + +template <_StorageKind __kind, int _Np> +struct is_abi_tag<__simd_abi<__kind, _Np>> + : std::integral_constant {}; + +template +struct is_simd : std::integral_constant {}; + +template +struct is_simd> : std::integral_constant {}; + +template +struct is_simd_mask : std::integral_constant {}; + +template +struct is_simd_mask> : std::integral_constant { +}; + +template +struct is_simd_flag_type : std::integral_constant {}; + +template <> +struct is_simd_flag_type + : std::integral_constant {}; + +template <> +struct is_simd_flag_type + : std::integral_constant {}; + +template +struct is_simd_flag_type> + : std::integral_constant {}; + +template +_LIBCPP_INLINE_VAR constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value; +template +_LIBCPP_INLINE_VAR constexpr bool is_simd_v = is_simd<_Tp>::value; +template +_LIBCPP_INLINE_VAR constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value; +template +_LIBCPP_INLINE_VAR constexpr bool is_simd_flag_type_v = + is_simd_flag_type<_Tp>::value; +template +struct abi_for_size { + using type = simd_abi::fixed_size<_Np>; +}; +template +using abi_for_size_t = typename abi_for_size<_Tp, _Np>::type; + +template > +struct simd_size; + +template +struct simd_size<_Tp, __simd_abi<__kind, _Np>> + : std::integral_constant { + static_assert( + std::is_arithmetic<_Tp>::value && + !std::is_same::type, bool>::value, + "Element type should be vectorizable"); +}; + +// TODO: implement it. +template +struct memory_alignment; + +template > +_LIBCPP_INLINE_VAR constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; + +template +_LIBCPP_INLINE_VAR constexpr size_t memory_alignment_v = + memory_alignment<_Tp, _Up>::value; + +// class template simd [simd.class] +template +using native_simd = simd<_Tp, simd_abi::native<_Tp>>; +template +using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>; + +// class template simd_mask [simd.mask.class] +template +using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>; + +template +using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>; + +// casts [simd.casts] +template +struct __static_simd_cast_traits { + template + static simd<_Tp, _Abi> __apply(const simd<_Up, _Abi>& __v); +}; + +template +struct __static_simd_cast_traits> { + template + static typename std::enable_if::size() == + simd<_Tp, _NewAbi>::size(), + simd<_Tp, _NewAbi>>::type + __apply(const simd<_Up, _Abi>& __v); +}; + +template +struct __simd_cast_traits { + template + static typename std::enable_if< + __is_non_narrowing_arithmetic_convertible<_Up, _Tp>(), + simd<_Tp, _Abi>>::type + __apply(const simd<_Up, _Abi>& __v); +}; + +template +struct __simd_cast_traits> { + template + static typename std::enable_if< + __is_non_narrowing_arithmetic_convertible<_Up, _Tp>() && + simd<_Up, _Abi>::size() == simd<_Tp, _NewAbi>::size(), + simd<_Tp, _NewAbi>>::type + __apply(const simd<_Up, _Abi>& __v); +}; + +template +auto simd_cast(const simd<_Up, _Abi>& __v) + -> decltype(__simd_cast_traits<_Tp>::__apply(__v)) { + return __simd_cast_traits<_Tp>::__apply(__v); +} + +template +auto static_simd_cast(const simd<_Up, _Abi>& __v) + -> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) { + return __static_simd_cast_traits<_Tp>::__apply(__v); +} + +template +fixed_size_simd<_Tp, simd_size<_Tp, _Abi>::value> +to_fixed_size(const simd<_Tp, _Abi>&) noexcept; + +template +fixed_size_simd_mask<_Tp, simd_size<_Tp, _Abi>::value> +to_fixed_size(const simd_mask<_Tp, _Abi>&) noexcept; + +template +native_simd<_Tp> to_native(const fixed_size_simd<_Tp, _Np>&) noexcept; + +template +native_simd_mask<_Tp> to_native(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; + +template +simd<_Tp> to_compatible(const fixed_size_simd<_Tp, _Np>&) noexcept; + +template +simd_mask<_Tp> to_compatible(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; + +template +tuple>...> split(const simd<_Tp, _Abi>&); + +template +tuple>...> +split(const simd_mask<_Tp, _Abi>&); + +template +array<_SimdType, simd_size::value / + _SimdType::size()> +split(const simd&); + +template +array<_SimdType, simd_size::value / + _SimdType::size()> +split(const simd_mask&); + +template +simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> +concat(const simd<_Tp, _Abis>&...); + +template +simd_mask<_Tp, + abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> +concat(const simd_mask<_Tp, _Abis>&...); + +// reductions [simd.mask.reductions] +template +bool all_of(const simd_mask<_Tp, _Abi>&) noexcept; +template +bool any_of(const simd_mask<_Tp, _Abi>&) noexcept; +template +bool none_of(const simd_mask<_Tp, _Abi>&) noexcept; +template +bool some_of(const simd_mask<_Tp, _Abi>&) noexcept; +template +int popcount(const simd_mask<_Tp, _Abi>&) noexcept; +template +int find_first_set(const simd_mask<_Tp, _Abi>&); +template +int find_last_set(const simd_mask<_Tp, _Abi>&); +bool all_of(bool) noexcept; +bool any_of(bool) noexcept; +bool none_of(bool) noexcept; +bool some_of(bool) noexcept; +int popcount(bool) noexcept; +int find_first_set(bool) noexcept; +int find_last_set(bool) noexcept; + +// masked assignment [simd.whereexpr] +template +class const_where_expression; +template +class where_expression; + +// masked assignment [simd.mask.where] +template +where_expression, simd<_Tp, _Abi>> +where(const typename simd<_Tp, _Abi>::mask_type&, simd<_Tp, _Abi>&) noexcept; + +template +const_where_expression, const simd<_Tp, _Abi>> +where(const typename simd<_Tp, _Abi>::mask_type&, + const simd<_Tp, _Abi>&) noexcept; + +template +where_expression, simd_mask<_Tp, _Abi>> +where(const typename __nodeduce>::type&, + simd_mask<_Tp, _Abi>&) noexcept; + +template +const_where_expression, const simd_mask<_Tp, _Abi>> +where(const typename __nodeduce>::type&, + const simd_mask<_Tp, _Abi>&) noexcept; + +template +where_expression where(bool, _Tp&) noexcept; + +template +const_where_expression where(bool, const _Tp&) noexcept; + +// reductions [simd.reductions] +template > +_Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp()); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + typename _SimdType::value_type neutral_element, _BinaryOp binary_op); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + plus binary_op = {}); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + multiplies binary_op); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + bit_and binary_op); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + bit_or binary_op); + +template +typename _SimdType::value_type +reduce(const const_where_expression<_MaskType, _SimdType>&, + bit_xor binary_op); + +template +_Tp hmin(const simd<_Tp, _Abi>&); +template +typename _SimdType::value_type +hmin(const const_where_expression<_MaskType, _SimdType>&); +template +_Tp hmax(const simd<_Tp, _Abi>&); +template +typename _SimdType::value_type +hmax(const const_where_expression<_MaskType, _SimdType>&); + +// algorithms [simd.alg] +template +simd<_Tp, _Abi> min(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; + +template +simd<_Tp, _Abi> max(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; + +template +std::pair, simd<_Tp, _Abi>> +minmax(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; + +template +simd<_Tp, _Abi> clamp(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&, + const simd<_Tp, _Abi>&); + +// [simd.whereexpr] +// TODO implement where expressions. +template +class const_where_expression { +public: + const_where_expression(const const_where_expression&) = delete; + const_where_expression& operator=(const const_where_expression&) = delete; + typename remove_const<_Tp>::type operator-() const&&; + template + void copy_to(_Up*, _Flags) const&&; +}; + +template +class where_expression : public const_where_expression<_MaskType, _Tp> { +public: + where_expression(const where_expression&) = delete; + where_expression& operator=(const where_expression&) = delete; + template + void operator=(_Up&&); + template + void operator+=(_Up&&); + template + void operator-=(_Up&&); + template + void operator*=(_Up&&); + template + void operator/=(_Up&&); + template + void operator%=(_Up&&); + template + void operator&=(_Up&&); + template + void operator|=(_Up&&); + template + void operator^=(_Up&&); + template + void operator<<=(_Up&&); + template + void operator>>=(_Up&&); + void operator++(); + void operator++(int); + void operator--(); + void operator--(int); + template + void copy_from(const _Up*, _Flags); +}; + +// [simd.class] +// TODO: implement simd +template +class simd { +public: + using value_type = _Tp; + using reference = __simd_reference<_Tp, _Tp, _Abi>; + using mask_type = simd_mask<_Tp, _Abi>; + using abi_type = _Abi; + + simd() = default; + simd(const simd&) = default; + simd& operator=(const simd&) = default; + + static constexpr size_t size() noexcept { + return simd_size<_Tp, _Abi>::value; + } + +private: + __simd_storage<_Tp, _Abi> __s_; + + template + static constexpr bool __can_broadcast() { + return (std::is_arithmetic<_Up>::value && + __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) || + (!std::is_arithmetic<_Up>::value && + std::is_convertible<_Up, _Tp>::value) || + std::is_same::type, int>::value || + (std::is_same::type, + unsigned int>::value && + std::is_unsigned<_Tp>::value); + } + + template + static constexpr decltype( + std::forward_as_tuple(std::declval<_Generator>()( + std::integral_constant())...), + bool()) + __can_generate(std::index_sequence<__indicies...>) { + return !__variadic_sum( + !__can_broadcast()( + std::integral_constant()))>()...); + } + + template + static bool __can_generate(...) { + return false; + } + + template + void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) { + int __not_used[]{((*this)[__indicies] = + __g(std::integral_constant()), + 0)...}; + (void)__not_used; + } + +public: + // implicit type conversion constructor + template >::value && + __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()>::type> + simd(const simd<_Up, simd_abi::fixed_size>& __v) { + for (size_t __i = 0; __i < size(); __i++) { + (*this)[__i] = static_cast<_Tp>(__v[__i]); + } + } + + // implicit broadcast constructor + template ()>::type> + simd(_Up&& __rv) { + auto __v = static_cast<_Tp>(__rv); + for (size_t __i = 0; __i < size(); __i++) { + (*this)[__i] = __v; + } + } + + // generator constructor + template (std::make_index_sequence()), + int>::type()> + explicit simd(_Generator&& __g) { + __generator_init(std::forward<_Generator>(__g), + std::make_index_sequence()); + } + + // load constructor + template < + class _Up, class _Flags, + class = typename std::enable_if<__vectorizable<_Up>()>::type, + class = typename std::enable_if::value>::type> + simd(const _Up* __buffer, _Flags) { + // TODO: optimize for overaligned flags + for (size_t __i = 0; __i < size(); __i++) { + (*this)[__i] = static_cast<_Tp>(__buffer[__i]); + } + } + + // loads [simd.load] + template + typename std::enable_if<__vectorizable<_Up>() && + is_simd_flag_type<_Flags>::value>::type + copy_from(const _Up* __buffer, _Flags) { + *this = simd(__buffer, _Flags()); + } + + // stores [simd.store] + template + typename std::enable_if<__vectorizable<_Up>() && + is_simd_flag_type<_Flags>::value>::type + copy_to(_Up* __buffer, _Flags) const { + // TODO: optimize for overaligned flags + for (size_t __i = 0; __i < size(); __i++) { + __buffer[__i] = static_cast<_Up>((*this)[__i]); + } + } + + // scalar access [simd.subscr] + reference operator[](size_t __i) { return reference(&__s_, __i); } + + value_type operator[](size_t __i) const { return __s_.__get(__i); } + + // unary operators [simd.unary] + simd& operator++(); + simd operator++(int); + simd& operator--(); + simd operator--(int); + mask_type operator!() const; + simd operator~() const; + simd operator+() const; + simd operator-() const; + + // binary operators [simd.binary] + friend simd operator+(const simd&, const simd&); + friend simd operator-(const simd&, const simd&); + friend simd operator*(const simd&, const simd&); + friend simd operator/(const simd&, const simd&); + friend simd operator%(const simd&, const simd&); + friend simd operator&(const simd&, const simd&); + friend simd operator|(const simd&, const simd&); + friend simd operator^(const simd&, const simd&); + friend simd operator<<(const simd&, const simd&); + friend simd operator>>(const simd&, const simd&); + friend simd operator<<(const simd&, int); + friend simd operator>>(const simd&, int); + + // compound assignment [simd.cassign] + friend simd& operator+=(simd&, const simd&); + friend simd& operator-=(simd&, const simd&); + friend simd& operator*=(simd&, const simd&); + friend simd& operator/=(simd&, const simd&); + friend simd& operator%=(simd&, const simd&); + + friend simd& operator&=(simd&, const simd&); + friend simd& operator|=(simd&, const simd&); + friend simd& operator^=(simd&, const simd&); + friend simd& operator<<=(simd&, const simd&); + friend simd& operator>>=(simd&, const simd&); + friend simd& operator<<=(simd&, int); + friend simd& operator>>=(simd&, int); + + // compares [simd.comparison] + friend mask_type operator==(const simd&, const simd&); + friend mask_type operator!=(const simd&, const simd&); + friend mask_type operator>=(const simd&, const simd&); + friend mask_type operator<=(const simd&, const simd&); + friend mask_type operator>(const simd&, const simd&); + friend mask_type operator<(const simd&, const simd&); +}; + +// [simd.mask.class] +template +// TODO: implement simd_mask +class simd_mask { +public: + using value_type = bool; + // TODO: this is strawman implementation. Turn it into a proxy type. + using reference = bool&; + using simd_type = simd<_Tp, _Abi>; + using abi_type = _Abi; + static constexpr size_t size() noexcept; + simd_mask() = default; + + // broadcast constructor + explicit simd_mask(value_type) noexcept; + + // implicit type conversion constructor + template + simd_mask(const simd_mask<_Up, simd_abi::fixed_size>&) noexcept; + + // load constructor + template + simd_mask(const value_type*, _Flags); + + // loads [simd.mask.copy] + template + void copy_from(const value_type*, _Flags); + template + void copy_to(value_type*, _Flags) const; + + // scalar access [simd.mask.subscr] + reference operator[](size_t); + value_type operator[](size_t) const; + + // unary operators [simd.mask.unary] + simd_mask operator!() const noexcept; + + // simd_mask binary operators [simd.mask.binary] + friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator&(const simd_mask&, const simd_mask&)noexcept; + friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept; + + // simd_mask compound assignment [simd.mask.cassign] + friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept; + friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept; + friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept; + + // simd_mask compares [simd.mask.comparison] + friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; +}; + +#endif // _LIBCPP_STD_VER >= 17 + +_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD + +#endif /* _LIBCPP_EXPERIMENTAL_SIMD */ diff --git a/include/c++/v1/filesystem b/include/c++/v1/filesystem new file mode 100644 index 0000000..339bb25 --- /dev/null +++ b/include/c++/v1/filesystem @@ -0,0 +1,2679 @@ +// -*- C++ -*- +//===--------------------------- filesystem -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _LIBCPP_FILESYSTEM +#define _LIBCPP_FILESYSTEM +/* + filesystem synopsis + + namespace std { namespace filesystem { + + class path; + + void swap(path& lhs, path& rhs) noexcept; + size_t hash_value(const path& p) noexcept; + + bool operator==(const path& lhs, const path& rhs) noexcept; + bool operator!=(const path& lhs, const path& rhs) noexcept; + bool operator< (const path& lhs, const path& rhs) noexcept; + bool operator<=(const path& lhs, const path& rhs) noexcept; + bool operator> (const path& lhs, const path& rhs) noexcept; + bool operator>=(const path& lhs, const path& rhs) noexcept; + + path operator/ (const path& lhs, const path& rhs); + + // fs.path.io operators are friends of path. + template + friend basic_ostream& + operator<<(basic_ostream& os, const path& p); + + template + friend basic_istream& + operator>>(basic_istream& is, path& p); + + template + path u8path(const Source& source); + template + path u8path(InputIterator first, InputIterator last); + + class filesystem_error; + class directory_entry; + + class directory_iterator; + + // enable directory_iterator range-based for statements + directory_iterator begin(directory_iterator iter) noexcept; + directory_iterator end(const directory_iterator&) noexcept; + + class recursive_directory_iterator; + + // enable recursive_directory_iterator range-based for statements + recursive_directory_iterator begin(recursive_directory_iterator iter) noexcept; + recursive_directory_iterator end(const recursive_directory_iterator&) noexcept; + + class file_status; + + struct space_info + { + uintmax_t capacity; + uintmax_t free; + uintmax_t available; + }; + + enum class file_type; + enum class perms; + enum class perm_options; + enum class copy_options; + enum class directory_options; + + typedef chrono::time_point file_time_type; + + // operational functions + + path absolute(const path& p); + path absolute(const path& p, error_code &ec); + + path canonical(const path& p); + path canonical(const path& p, error_code& ec); + + void copy(const path& from, const path& to); + void copy(const path& from, const path& to, error_code& ec); + void copy(const path& from, const path& to, copy_options options); + void copy(const path& from, const path& to, copy_options options, + error_code& ec); + + bool copy_file(const path& from, const path& to); + bool copy_file(const path& from, const path& to, error_code& ec); + bool copy_file(const path& from, const path& to, copy_options option); + bool copy_file(const path& from, const path& to, copy_options option, + error_code& ec); + + void copy_symlink(const path& existing_symlink, const path& new_symlink); + void copy_symlink(const path& existing_symlink, const path& new_symlink, + error_code& ec) noexcept; + + bool create_directories(const path& p); + bool create_directories(const path& p, error_code& ec); + + bool create_directory(const path& p); + bool create_directory(const path& p, error_code& ec) noexcept; + + bool create_directory(const path& p, const path& attributes); + bool create_directory(const path& p, const path& attributes, + error_code& ec) noexcept; + + void create_directory_symlink(const path& to, const path& new_symlink); + void create_directory_symlink(const path& to, const path& new_symlink, + error_code& ec) noexcept; + + void create_hard_link(const path& to, const path& new_hard_link); + void create_hard_link(const path& to, const path& new_hard_link, + error_code& ec) noexcept; + + void create_symlink(const path& to, const path& new_symlink); + void create_symlink(const path& to, const path& new_symlink, + error_code& ec) noexcept; + + path current_path(); + path current_path(error_code& ec); + void current_path(const path& p); + void current_path(const path& p, error_code& ec) noexcept; + + bool exists(file_status s) noexcept; + bool exists(const path& p); + bool exists(const path& p, error_code& ec) noexcept; + + bool equivalent(const path& p1, const path& p2); + bool equivalent(const path& p1, const path& p2, error_code& ec) noexcept; + + uintmax_t file_size(const path& p); + uintmax_t file_size(const path& p, error_code& ec) noexcept; + + uintmax_t hard_link_count(const path& p); + uintmax_t hard_link_count(const path& p, error_code& ec) noexcept; + + bool is_block_file(file_status s) noexcept; + bool is_block_file(const path& p); + bool is_block_file(const path& p, error_code& ec) noexcept; + + bool is_character_file(file_status s) noexcept; + bool is_character_file(const path& p); + bool is_character_file(const path& p, error_code& ec) noexcept; + + bool is_directory(file_status s) noexcept; + bool is_directory(const path& p); + bool is_directory(const path& p, error_code& ec) noexcept; + + bool is_empty(const path& p); + bool is_empty(const path& p, error_code& ec) noexcept; + + bool is_fifo(file_status s) noexcept; + bool is_fifo(const path& p); + bool is_fifo(const path& p, error_code& ec) noexcept; + + bool is_other(file_status s) noexcept; + bool is_other(const path& p); + bool is_other(const path& p, error_code& ec) noexcept; + + bool is_regular_file(file_status s) noexcept; + bool is_regular_file(const path& p); + bool is_regular_file(const path& p, error_code& ec) noexcept; + + bool is_socket(file_status s) noexcept; + bool is_socket(const path& p); + bool is_socket(const path& p, error_code& ec) noexcept; + + bool is_symlink(file_status s) noexcept; + bool is_symlink(const path& p); + bool is_symlink(const path& p, error_code& ec) noexcept; + + file_time_type last_write_time(const path& p); + file_time_type last_write_time(const path& p, error_code& ec) noexcept; + void last_write_time(const path& p, file_time_type new_time); + void last_write_time(const path& p, file_time_type new_time, + error_code& ec) noexcept; + + void permissions(const path& p, perms prms, + perm_options opts=perm_options::replace); + void permissions(const path& p, perms prms, error_code& ec) noexcept; + void permissions(const path& p, perms prms, perm_options opts, + error_code& ec); + + path proximate(const path& p, error_code& ec); + path proximate(const path& p, const path& base = current_path()); + path proximate(const path& p, const path& base, error_code &ec); + + path read_symlink(const path& p); + path read_symlink(const path& p, error_code& ec); + + path relative(const path& p, error_code& ec); + path relative(const path& p, const path& base=current_path()); + path relative(const path& p, const path& base, error_code& ec); + + bool remove(const path& p); + bool remove(const path& p, error_code& ec) noexcept; + + uintmax_t remove_all(const path& p); + uintmax_t remove_all(const path& p, error_code& ec); + + void rename(const path& from, const path& to); + void rename(const path& from, const path& to, error_code& ec) noexcept; + + void resize_file(const path& p, uintmax_t size); + void resize_file(const path& p, uintmax_t size, error_code& ec) noexcept; + + space_info space(const path& p); + space_info space(const path& p, error_code& ec) noexcept; + + file_status status(const path& p); + file_status status(const path& p, error_code& ec) noexcept; + + bool status_known(file_status s) noexcept; + + file_status symlink_status(const path& p); + file_status symlink_status(const path& p, error_code& ec) noexcept; + + path temp_directory_path(); + path temp_directory_path(error_code& ec); + + path weakly_canonical(path const& p); + path weakly_canonical(path const& p, error_code& ec); + + +} } // namespaces std::filesystem + +*/ + +#include <__config> +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for quoted +#include +#include + +#include <__debug> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +struct _FilesystemClock { +#if !defined(_LIBCPP_HAS_NO_INT128) + typedef __int128_t rep; + typedef nano period; +#else + typedef long long rep; + typedef nano period; +#endif + + typedef chrono::duration duration; + typedef chrono::time_point<_FilesystemClock> time_point; + + static _LIBCPP_CONSTEXPR_AFTER_CXX11 const bool is_steady = false; + + _LIBCPP_FUNC_VIS static time_point now() noexcept; + + _LIBCPP_INLINE_VISIBILITY + static time_t to_time_t(const time_point& __t) noexcept { + typedef chrono::duration __secs; + return time_t( + chrono::duration_cast<__secs>(__t.time_since_epoch()).count()); + } + + _LIBCPP_INLINE_VISIBILITY + static time_point from_time_t(time_t __t) noexcept { + typedef chrono::duration __secs; + return time_point(__secs(__t)); + } +}; + +typedef chrono::time_point<_FilesystemClock> file_time_type; + +struct _LIBCPP_TYPE_VIS space_info { + uintmax_t capacity; + uintmax_t free; + uintmax_t available; +}; + +enum class _LIBCPP_ENUM_VIS file_type : signed char { + none = 0, + not_found = -1, + regular = 1, + directory = 2, + symlink = 3, + block = 4, + character = 5, + fifo = 6, + socket = 7, + unknown = 8 +}; + +enum class _LIBCPP_ENUM_VIS perms : unsigned { + none = 0, + + owner_read = 0400, + owner_write = 0200, + owner_exec = 0100, + owner_all = 0700, + + group_read = 040, + group_write = 020, + group_exec = 010, + group_all = 070, + + others_read = 04, + others_write = 02, + others_exec = 01, + others_all = 07, + + all = 0777, + + set_uid = 04000, + set_gid = 02000, + sticky_bit = 01000, + mask = 07777, + unknown = 0xFFFF, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator&(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator|(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator^(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator~(perms _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } + +enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { + replace = 1, + add = 2, + remove = 4, + nofollow = 8 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator~(perm_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { + none = 0, + skip_existing = 1, + overwrite_existing = 2, + update_existing = 4, + recursive = 8, + copy_symlinks = 16, + skip_symlinks = 32, + directories_only = 64, + create_symlinks = 128, + create_hard_links = 256, + __in_recursive_copy = 512, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator~(copy_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { + none = 0, + follow_directory_symlink = 1, + skip_permission_denied = 2 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator&(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator|(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator^(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator~(directory_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator&=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator|=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator^=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +class _LIBCPP_TYPE_VIS file_status { +public: + // constructors + _LIBCPP_INLINE_VISIBILITY + file_status() noexcept : file_status(file_type::none) {} + _LIBCPP_INLINE_VISIBILITY + explicit file_status(file_type __ft, perms __prms = perms::unknown) noexcept + : __ft_(__ft), + __prms_(__prms) {} + + file_status(const file_status&) noexcept = default; + file_status(file_status&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + ~file_status() {} + + file_status& operator=(const file_status&) noexcept = default; + file_status& operator=(file_status&&) noexcept = default; + + // observers + _LIBCPP_INLINE_VISIBILITY + file_type type() const noexcept { return __ft_; } + + _LIBCPP_INLINE_VISIBILITY + perms permissions() const noexcept { return __prms_; } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void type(file_type __ft) noexcept { __ft_ = __ft; } + + _LIBCPP_INLINE_VISIBILITY + void permissions(perms __p) noexcept { __prms_ = __p; } + +private: + file_type __ft_; + perms __prms_; +}; + +class _LIBCPP_TYPE_VIS directory_entry; + +template +struct __can_convert_char { + static const bool value = false; +}; +template +struct __can_convert_char : public __can_convert_char<_Tp> {}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = wchar_t; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char16_t; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char32_t; +}; + +template +typename enable_if<__can_convert_char<_ECharT>::value, bool>::type +__is_separator(_ECharT __e) { + return __e == _ECharT('/'); +} + +struct _NullSentinal {}; + +template +using _Void = void; + +template +struct __is_pathable_string : public false_type {}; + +template +struct __is_pathable_string< + basic_string<_ECharT, _Traits, _Alloc>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string<_ECharT, _Traits, _Alloc>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template +struct __is_pathable_string< + basic_string_view<_ECharT, _Traits>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string_view<_ECharT, _Traits>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template ::type, + class _UnqualPtrType = + typename remove_const::type>::type, + bool _IsCharPtr = is_pointer<_DS>::value&& + __can_convert_char<_UnqualPtrType>::value> +struct __is_pathable_char_array : false_type {}; + +template +struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> + : __can_convert_char::type> { + using _Base = __can_convert_char::type>; + + static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } + static _ECharT const* __range_end(const _ECharT* __b) { + using _Iter = const _ECharT*; + const _ECharT __sentinal = _ECharT{}; + _Iter __e = __b; + for (; *__e != __sentinal; ++__e) + ; + return __e; + } + + static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } +}; + +template ::value, + class = void> +struct __is_pathable_iter : false_type {}; + +template +struct __is_pathable_iter< + _Iter, true, + _Void::value_type>::__char_type> > + : __can_convert_char::value_type> { + using _ECharT = typename iterator_traits<_Iter>::value_type; + using _Base = __can_convert_char<_ECharT>; + + static _Iter __range_begin(_Iter __b) { return __b; } + static _NullSentinal __range_end(_Iter) { return _NullSentinal{}; } + + static _ECharT __first_or_null(_Iter __b) { return *__b; } +}; + +template ::value, + bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, + bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value> +struct __is_pathable : false_type { + static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); +}; + +template +struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; + +template +struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> { +}; + +template +struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; + +template +struct _PathCVT { + static_assert(__can_convert_char<_ECharT>::value, + "Char type not convertible"); + + typedef __narrow_to_utf8 _Narrower; + + static void __append_range(string& __dest, _ECharT const* __b, + _ECharT const* __e) { + _Narrower()(back_inserter(__dest), __b, __e); + } + + template + static void __append_range(string& __dest, _Iter __b, _Iter __e) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + if (__b == __e) + return; + basic_string<_ECharT> __tmp(__b, __e); + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); + } + + template + static void __append_range(string& __dest, _Iter __b, _NullSentinal) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + const _ECharT __sentinal = _ECharT{}; + if (*__b == __sentinal) + return; + basic_string<_ECharT> __tmp; + for (; *__b != __sentinal; ++__b) + __tmp.push_back(*__b); + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); + } + + template + static void __append_source(string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +template <> +struct _PathCVT { + + template + static typename enable_if<__is_exactly_input_iterator<_Iter>::value>::type + __append_range(string& __dest, _Iter __b, _Iter __e) { + for (; __b != __e; ++__b) + __dest.push_back(*__b); + } + + template + static typename enable_if<__is_forward_iterator<_Iter>::value>::type + __append_range(string& __dest, _Iter __b, _Iter __e) { + __dest.__append_forward_unsafe(__b, __e); + } + + template + static void __append_range(string& __dest, _Iter __b, _NullSentinal) { + const char __sentinal = char{}; + for (; *__b != __sentinal; ++__b) + __dest.push_back(*__b); + } + + template + static void __append_source(string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +class _LIBCPP_TYPE_VIS path { + template + using _EnableIfPathable = + typename enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; + + template + using _SourceChar = typename __is_pathable<_Tp>::__char_type; + + template + using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; + +public: + typedef char value_type; + typedef basic_string string_type; + typedef _VSTD::string_view __string_view; + static constexpr value_type preferred_separator = '/'; + + enum class _LIBCPP_ENUM_VIS format : unsigned char { + auto_format, + native_format, + generic_format + }; + + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY path() noexcept {} + _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} + _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept + : __pn_(_VSTD::move(__p.__pn_)) {} + + _LIBCPP_INLINE_VISIBILITY + path(string_type&& __s, format = format::auto_format) noexcept + : __pn_(_VSTD::move(__s)) {} + + template > + path(const _Source& __src, format = format::auto_format) { + _SourceCVT<_Source>::__append_source(__pn_, __src); + } + + template + path(_InputIt __first, _InputIt __last, format = format::auto_format) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + } + + // TODO Implement locale conversions. + template > + path(const _Source& __src, const locale& __loc, format = format::auto_format); + template + path(_InputIt __first, _InputIt _last, const locale& __loc, + format = format::auto_format); + + _LIBCPP_INLINE_VISIBILITY + ~path() = default; + + // assignments + _LIBCPP_INLINE_VISIBILITY + path& operator=(const path& __p) { + __pn_ = __p.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator=(path&& __p) noexcept { + __pn_ = _VSTD::move(__p.__pn_); + return *this; + } + + template + _LIBCPP_INLINE_VISIBILITY path& operator=(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& assign(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator=(const _Source& __src) { + return this->assign(__src); + } + + template + _EnableIfPathable<_Source> assign(const _Source& __src) { + __pn_.clear(); + _SourceCVT<_Source>::__append_source(__pn_, __src); + return *this; + } + + template + path& assign(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + __pn_.clear(); + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + +private: + template + static bool __source_is_absolute(_ECharT __first_or_null) { + return __is_separator(__first_or_null); + } + +public: + // appends + path& operator/=(const path& __p) { + if (__p.is_absolute()) { + __pn_ = __p.__pn_; + return *this; + } + if (has_filename()) + __pn_ += preferred_separator; + __pn_ += __p.native(); + return *this; + } + + // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src + // is known at compile time to be "/' since the user almost certainly intended + // to append a separator instead of overwriting the path with "/" + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator/=(const _Source& __src) { + return this->append(__src); + } + + template + _EnableIfPathable<_Source> append(const _Source& __src) { + using _Traits = __is_pathable<_Source>; + using _CVT = _PathCVT<_SourceChar<_Source> >; + if (__source_is_absolute(_Traits::__first_or_null(__src))) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_source(__pn_, __src); + return *this; + } + + template + path& append(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); + using _CVT = _PathCVT<_ItVal>; + if (__first != __last && __source_is_absolute(*__first)) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_range(__pn_, __first, __last); + return *this; + } + + // concatenation + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const path& __x) { + __pn_ += __x.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const string_type& __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(__string_view __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const value_type* __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(value_type __x) { + __pn_ += __x; + return *this; + } + + template + typename enable_if<__can_convert_char<_ECharT>::value, path&>::type + operator+=(_ECharT __x) { + basic_string<_ECharT> __tmp; + __tmp += __x; + _PathCVT<_ECharT>::__append_source(__pn_, __tmp); + return *this; + } + + template + _EnableIfPathable<_Source> operator+=(const _Source& __x) { + return this->concat(__x); + } + + template + _EnableIfPathable<_Source> concat(const _Source& __x) { + _SourceCVT<_Source>::__append_source(__pn_, __x); + return *this; + } + + template + path& concat(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void clear() noexcept { __pn_.clear(); } + + path& make_preferred() { return *this; } + + _LIBCPP_INLINE_VISIBILITY + path& remove_filename() { + auto __fname = __filename(); + if (!__fname.empty()) + __pn_.erase(__fname.data() - __pn_.data()); + return *this; + } + + path& replace_filename(const path& __replacement) { + remove_filename(); + return (*this /= __replacement); + } + + path& replace_extension(const path& __replacement = path()); + + _LIBCPP_INLINE_VISIBILITY + void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } + + // private helper to allow reserving memory in the path + _LIBCPP_INLINE_VISIBILITY + void __reserve(size_t __s) { __pn_.reserve(__s); } + + // native format observers + _LIBCPP_INLINE_VISIBILITY + const string_type& native() const noexcept { return __pn_; } + + _LIBCPP_INLINE_VISIBILITY + const value_type* c_str() const noexcept { return __pn_.c_str(); } + + _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } + + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + string(const _Allocator& __a = _Allocator()) const { + using _CVT = __widen_from_utf8; + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s(__a); + __s.reserve(__pn_.size()); + _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); + return __s; + } + + _LIBCPP_INLINE_VISIBILITY std::string string() const { return __pn_; } + _LIBCPP_INLINE_VISIBILITY std::wstring wstring() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY std::string u8string() const { return __pn_; } + _LIBCPP_INLINE_VISIBILITY std::u16string u16string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY std::u32string u32string() const { + return string(); + } + + // generic format observers + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + generic_string(const _Allocator& __a = _Allocator()) const { + return string<_ECharT, _Traits, _Allocator>(__a); + } + + std::string generic_string() const { return __pn_; } + std::wstring generic_wstring() const { return string(); } + std::string generic_u8string() const { return __pn_; } + std::u16string generic_u16string() const { return string(); } + std::u32string generic_u32string() const { return string(); } + +private: + int __compare(__string_view) const; + __string_view __root_name() const; + __string_view __root_directory() const; + __string_view __root_path_raw() const; + __string_view __relative_path() const; + __string_view __parent_path() const; + __string_view __filename() const; + __string_view __stem() const; + __string_view __extension() const; + +public: + // compare + _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { + return __compare(__p.__pn_); + } + _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { + return __compare(__s); + } + + // decomposition + _LIBCPP_INLINE_VISIBILITY path root_name() const { + return string_type(__root_name()); + } + _LIBCPP_INLINE_VISIBILITY path root_directory() const { + return string_type(__root_directory()); + } + _LIBCPP_INLINE_VISIBILITY path root_path() const { + return root_name().append(string_type(__root_directory())); + } + _LIBCPP_INLINE_VISIBILITY path relative_path() const { + return string_type(__relative_path()); + } + _LIBCPP_INLINE_VISIBILITY path parent_path() const { + return string_type(__parent_path()); + } + _LIBCPP_INLINE_VISIBILITY path filename() const { + return string_type(__filename()); + } + _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } + _LIBCPP_INLINE_VISIBILITY path extension() const { + return string_type(__extension()); + } + + // query + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool + empty() const noexcept { + return __pn_.empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { + return !__root_name().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { + return !__root_directory().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { + return !__root_path_raw().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { + return !__relative_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { + return !__parent_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_filename() const { + return !__filename().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } + _LIBCPP_INLINE_VISIBILITY bool has_extension() const { + return !__extension().empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { + return has_root_directory(); + } + _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } + + // relative paths + path lexically_normal() const; + path lexically_relative(const path& __base) const; + + _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { + path __result = this->lexically_relative(__base); + if (__result.native().empty()) + return *this; + return __result; + } + + // iterators + class _LIBCPP_TYPE_VIS iterator; + typedef iterator const_iterator; + + iterator begin() const; + iterator end() const; + + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value && + is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << std::__quoted(__p.native()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value || + !is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << std::__quoted(__p.string<_CharT, _Traits>()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { + basic_string<_CharT, _Traits> __tmp; + __is >> __quoted(__tmp); + __p = __tmp; + return __is; + } + +private: + inline _LIBCPP_INLINE_VISIBILITY path& + __assign_view(__string_view const& __s) noexcept { + __pn_ = string_type(__s); + return *this; + } + string_type __pn_; +}; + +inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { + __lhs.swap(__rhs); +} + +_LIBCPP_FUNC_VIS +size_t hash_value(const path& __p) noexcept; + +inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) == 0; +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) != 0; +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) < 0; +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) <= 0; +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) > 0; +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, + const path& __rhs) noexcept { + return __lhs.compare(__rhs) >= 0; +} + +inline _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, + const path& __rhs) { + path __result(__lhs); + __result /= __rhs; + return __result; +} + +template +_LIBCPP_INLINE_VISIBILITY + typename enable_if<__is_pathable<_Source>::value, path>::type + u8path(const _Source& __s) { + static_assert( + is_same::__char_type, char>::value, + "u8path(Source const&) requires Source have a character type of type " + "'char'"); + return path(__s); +} + +template +_LIBCPP_INLINE_VISIBILITY + typename enable_if<__is_pathable<_InputIt>::value, path>::type + u8path(_InputIt __f, _InputIt __l) { + static_assert( + is_same::__char_type, char>::value, + "u8path(Iter, Iter) requires Iter have a value_type of type 'char'"); + return path(__f, __l); +} + +class _LIBCPP_TYPE_VIS path::iterator { +public: + enum _ParserState : unsigned char { + _Singular, + _BeforeBegin, + _InRootName, + _InRootDir, + _InFilenames, + _InTrailingSep, + _AtEnd + }; + +public: + typedef bidirectional_iterator_tag iterator_category; + + typedef path value_type; + typedef std::ptrdiff_t difference_type; + typedef const path* pointer; + typedef const path& reference; + + typedef void + __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator + +public: + _LIBCPP_INLINE_VISIBILITY + iterator() + : __stashed_elem_(), __path_ptr_(nullptr), __entry_(), + __state_(_Singular) {} + + iterator(const iterator&) = default; + ~iterator() = default; + + iterator& operator=(const iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + reference operator*() const { return __stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + pointer operator->() const { return &__stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator++() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to increment a singular iterator"); + _LIBCPP_ASSERT(__state_ != _AtEnd, + "attempting to increment the end iterator"); + return __increment(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator++(int) { + iterator __it(*this); + this->operator++(); + return __it; + } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator--() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to decrement a singular iterator"); + _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), + "attempting to decrement the begin iterator"); + return __decrement(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator--(int) { + iterator __it(*this); + this->operator--(); + return __it; + } + +private: + friend class path; + + inline _LIBCPP_INLINE_VISIBILITY friend bool operator==(const iterator&, + const iterator&); + + iterator& __increment(); + iterator& __decrement(); + + path __stashed_elem_; + const path* __path_ptr_; + path::__string_view __entry_; + _ParserState __state_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path::iterator& __lhs, + const path::iterator& __rhs) { + return __lhs.__path_ptr_ == __rhs.__path_ptr_ && + __lhs.__entry_.data() == __rhs.__entry_.data(); +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path::iterator& __lhs, + const path::iterator& __rhs) { + return !(__lhs == __rhs); +} + +class _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error { +public: + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(path(), path())) { + __create_what(0); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, path())) { + __create_what(1); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, const path& __p2, + error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, __p2)) { + __create_what(2); + } + + _LIBCPP_INLINE_VISIBILITY + const path& path1() const noexcept { return __storage_->__p1_; } + + _LIBCPP_INLINE_VISIBILITY + const path& path2() const noexcept { return __storage_->__p2_; } + + ~filesystem_error() override; // key function + + _LIBCPP_INLINE_VISIBILITY + const char* what() const noexcept override { + return __storage_->__what_.c_str(); + } + + _LIBCPP_FUNC_VIS + void __create_what(int __num_paths); + +private: + struct _Storage { + _LIBCPP_INLINE_VISIBILITY + _Storage(const path& __p1, const path& __p2) : __p1_(__p1), __p2_(__p2) {} + + path __p1_; + path __p2_; + string __what_; + }; + shared_ptr<_Storage> __storage_; +}; + +template +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY +#ifndef _LIBCPP_NO_EXCEPTIONS + void + __throw_filesystem_error(_Args&&... __args) { + throw filesystem_error(std::forward<_Args>(__args)...); +} +#else + void + __throw_filesystem_error(_Args&&...) { + _VSTD::abort(); +} +#endif + +// operational functions + +_LIBCPP_FUNC_VIS +path __absolute(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __canonical(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __copy_file(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directories(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, const path& attributes, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_directory_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_hard_link(const path& __to, const path& __new_hard_link, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __current_path(error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __current_path(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __equivalent(const path&, const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __file_size(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __fs_is_empty(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +file_time_type __last_write_time(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __last_write_time(const path& p, file_time_type new_time, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __permissions(const path&, perms, perm_options, error_code* = nullptr); +_LIBCPP_FUNC_VIS +path __read_symlink(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __remove(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __remove_all(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __rename(const path& from, const path& to, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +space_info __space(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __symlink_status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __system_complete(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __temp_directory_path(error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __weakly_canonical(path const& __p, error_code* __ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY path current_path() { + return __current_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path current_path(error_code& __ec) { + return __current_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p) { + __current_path(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p, + error_code& __ec) noexcept { + __current_path(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p) { + return __absolute(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p, + error_code& __ec) { + return __absolute(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p) { + return __canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p, + error_code& __ec) { + return __canonical(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, + const path& __to) { + __copy(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + error_code& __ec) { + __copy(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt) { + __copy(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt, + error_code& __ec) { + __copy(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to) { + return __copy_file(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, error_code& __ec) { + return __copy_file(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, copy_options __opt) { + return __copy_file(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to, + copy_options __opt, + error_code& __ec) { + return __copy_file(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy_symlink(const path& __existing, + const path& __new) { + __copy_symlink(__existing, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +copy_symlink(const path& __ext, const path& __new, error_code& __ec) noexcept { + __copy_symlink(__ext, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p) { + return __create_directories(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p, + error_code& __ec) { + return __create_directories(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p) { + return __create_directory(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, error_code& __ec) noexcept { + return __create_directory(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p, + const path& __attrs) { + return __create_directory(__p, __attrs); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, const path& __attrs, + error_code& __ec) noexcept { + return __create_directory(__p, __attrs, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new) { + __create_directory_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_directory_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_hard_link(const path& __to, + const path& __new) { + __create_hard_link(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_hard_link(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_hard_link(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_symlink(const path& __to, + const path& __new) { + __create_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_symlink(const path& __to, const path& __new, error_code& __ec) noexcept { + return __create_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool status_known(file_status __s) noexcept { + return __s.type() != file_type::none; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(file_status __s) noexcept { + return status_known(__s) && __s.type() != file_type::not_found; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p) { + return exists(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p, + error_code& __ec) noexcept { + auto __s = __status(__p, &__ec); + if (status_known(__s)) + __ec.clear(); + return exists(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY bool equivalent(const path& __p1, + const path& __p2) { + return __equivalent(__p1, __p2); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { + return __equivalent(__p1, __p2, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t file_size(const path& __p) { + return __file_size(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +file_size(const path& __p, error_code& __ec) noexcept { + return __file_size(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t hard_link_count(const path& __p) { + return __hard_link_count(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +hard_link_count(const path& __p, error_code& __ec) noexcept { + return __hard_link_count(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(file_status __s) noexcept { + return __s.type() == file_type::block; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p) { + return is_block_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p, + error_code& __ec) noexcept { + return is_block_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(file_status __s) noexcept { + return __s.type() == file_type::character; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_character_file(const path& __p) { + return is_character_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(const path& __p, error_code& __ec) noexcept { + return is_character_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(file_status __s) noexcept { + return __s.type() == file_type::directory; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p) { + return is_directory(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p, + error_code& __ec) noexcept { + return is_directory(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p) { + return __fs_is_empty(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p, + error_code& __ec) { + return __fs_is_empty(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(file_status __s) noexcept { + return __s.type() == file_type::fifo; +} +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p) { + return is_fifo(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p, + error_code& __ec) noexcept { + return is_fifo(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(file_status __s) noexcept { + return __s.type() == file_type::regular; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_regular_file(const path& __p) { + return is_regular_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(const path& __p, error_code& __ec) noexcept { + return is_regular_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(file_status __s) noexcept { + return __s.type() == file_type::socket; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p) { + return is_socket(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p, + error_code& __ec) noexcept { + return is_socket(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(file_status __s) noexcept { + return __s.type() == file_type::symlink; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p) { + return is_symlink(__symlink_status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p, + error_code& __ec) noexcept { + return is_symlink(__symlink_status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(file_status __s) noexcept { + return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && + !is_symlink(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p) { + return is_other(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p, + error_code& __ec) noexcept { + return is_other(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p) { + return __last_write_time(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p, error_code& __ec) noexcept { + return __last_write_time(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void last_write_time(const path& __p, + file_time_type __t) { + __last_write_time(__p, __t); +} + +inline _LIBCPP_INLINE_VISIBILITY void +last_write_time(const path& __p, file_time_type __t, + error_code& __ec) noexcept { + __last_write_time(__p, __t, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void +permissions(const path& __p, perms __prms, + perm_options __opts = perm_options::replace) { + __permissions(__p, __prms, __opts); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + error_code& __ec) noexcept { + __permissions(__p, __prms, perm_options::replace, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + perm_options __opts, + error_code& __ec) { + __permissions(__p, __prms, __opts, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return {}; + path __tmp_base = __weakly_canonical(__base, &__ec); + if (__ec) + return {}; + return __tmp.lexically_proximate(__tmp_base); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + error_code& __ec) { + return proximate(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +proximate(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_proximate( + __weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p) { + return __read_symlink(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p, + error_code& __ec) { + return __read_symlink(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return path(); + path __tmpbase = __weakly_canonical(__base, &__ec); + if (__ec) + return path(); + return __tmp.lexically_relative(__tmpbase); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + error_code& __ec) { + return relative(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +relative(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p) { + return __remove(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p, + error_code& __ec) noexcept { + return __remove(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p) { + return __remove_all(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p, + error_code& __ec) { + return __remove_all(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void rename(const path& __from, + const path& __to) { + return __rename(__from, __to); +} + +inline _LIBCPP_INLINE_VISIBILITY void +rename(const path& __from, const path& __to, error_code& __ec) noexcept { + return __rename(__from, __to, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void resize_file(const path& __p, + uintmax_t __ns) { + return __resize_file(__p, __ns); +} + +inline _LIBCPP_INLINE_VISIBILITY void +resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { + return __resize_file(__p, __ns, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p) { + return __space(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p, + error_code& __ec) noexcept { + return __space(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p) { + return __status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p, + error_code& __ec) noexcept { + return __status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status symlink_status(const path& __p) { + return __symlink_status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status +symlink_status(const path& __p, error_code& __ec) noexcept { + return __symlink_status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path() { + return __temp_directory_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path(error_code& __ec) { + return __temp_directory_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p) { + return __weakly_canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p, + error_code& __ec) { + return __weakly_canonical(__p, &__ec); +} + +class directory_iterator; +class recursive_directory_iterator; +class __dir_stream; + +class directory_entry { + typedef _VSTD_FS::path _Path; + +public: + // constructors and destructors + directory_entry() noexcept = default; + directory_entry(directory_entry const&) = default; + directory_entry(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + explicit directory_entry(_Path const& __p) : __p_(__p) { + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + directory_entry(_Path const& __p, error_code& __ec) : __p_(__p) { + __refresh(&__ec); + } + + ~directory_entry() {} + + directory_entry& operator=(directory_entry const&) = default; + directory_entry& operator=(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p) { + __p_ = __p; + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p, error_code& __ec) { + __p_ = __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p) { + __p_.replace_filename(__p); + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p, error_code& __ec) { + __p_ = __p_.parent_path() / __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void refresh() { __refresh(); } + + _LIBCPP_INLINE_VISIBILITY + void refresh(error_code& __ec) noexcept { __refresh(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + _Path const& path() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + operator const _Path&() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + bool exists() const { return _VSTD_FS::exists(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool exists(error_code& __ec) const noexcept { + return _VSTD_FS::exists(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file() const { return __get_ft() == file_type::block; } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::block; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file() const { return __get_ft() == file_type::character; } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::character; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory() const { return __get_ft() == file_type::directory; } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::directory; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo() const { return __get_ft() == file_type::fifo; } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::fifo; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_other() const { return _VSTD_FS::is_other(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool is_other(error_code& __ec) const noexcept { + return _VSTD_FS::is_other(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file() const { return __get_ft() == file_type::regular; } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::regular; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket() const { return __get_ft() == file_type::socket; } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::socket; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink(error_code& __ec) const noexcept { + return __get_sym_ft(&__ec) == file_type::symlink; + } + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size() const { return __get_size(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size(error_code& __ec) const noexcept { + return __get_size(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count() const { return __get_nlink(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count(error_code& __ec) const noexcept { + return __get_nlink(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time() const { return __get_write_time(); } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time(error_code& __ec) const noexcept { + return __get_write_time(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status status() const { return __get_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status status(error_code& __ec) const noexcept { + return __get_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status() const { return __get_symlink_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status(error_code& __ec) const noexcept { + return __get_symlink_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<(directory_entry const& __rhs) const noexcept { + return __p_ < __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator==(directory_entry const& __rhs) const noexcept { + return __p_ == __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator!=(directory_entry const& __rhs) const noexcept { + return __p_ != __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<=(directory_entry const& __rhs) const noexcept { + return __p_ <= __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>(directory_entry const& __rhs) const noexcept { + return __p_ > __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>=(directory_entry const& __rhs) const noexcept { + return __p_ >= __rhs.__p_; + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + friend class __dir_stream; + + enum _CacheType : unsigned char { + _Empty, + _IterSymlink, + _IterNonSymlink, + _RefreshSymlink, + _RefreshSymlinkUnresolved, + _RefreshNonSymlink + }; + + struct __cached_data { + uintmax_t __size_; + uintmax_t __nlink_; + file_time_type __write_time_; + perms __sym_perms_; + perms __non_sym_perms_; + file_type __type_; + _CacheType __cache_type_; + + _LIBCPP_INLINE_VISIBILITY + __cached_data() noexcept { __reset(); } + + _LIBCPP_INLINE_VISIBILITY + void __reset() { + __cache_type_ = _Empty; + __type_ = file_type::none; + __sym_perms_ = __non_sym_perms_ = perms::unknown; + __size_ = __nlink_ = uintmax_t(-1); + __write_time_ = file_time_type::min(); + } + }; + + _LIBCPP_INLINE_VISIBILITY + static __cached_data __create_iter_result(file_type __ft) { + __cached_data __data; + __data.__type_ = __ft; + __data.__cache_type_ = [&]() { + switch (__ft) { + case file_type::none: + return _Empty; + case file_type::symlink: + return _IterSymlink; + default: + return _IterNonSymlink; + } + }(); + return __data; + } + + _LIBCPP_INLINE_VISIBILITY + void __assign_iter_entry(_Path&& __p, __cached_data __dt) { + __p_ = std::move(__p); + __data_ = __dt; + } + + _LIBCPP_FUNC_VIS + error_code __do_refresh() noexcept; + + _LIBCPP_INLINE_VISIBILITY + static bool __is_dne_error(error_code const& __ec) { + if (!__ec) + return true; + switch (static_cast(__ec.value())) { + case errc::no_such_file_or_directory: + case errc::not_a_directory: + return true; + default: + return false; + } + } + + _LIBCPP_INLINE_VISIBILITY + void __handle_error(const char* __msg, error_code* __dest_ec, + error_code const& __ec, bool __allow_dne = false) const { + if (__dest_ec) { + *__dest_ec = __ec; + return; + } + if (__ec && (!__allow_dne || !__is_dne_error(__ec))) + __throw_filesystem_error(__msg, __p_, __ec); + } + + _LIBCPP_INLINE_VISIBILITY + void __refresh(error_code* __ec = nullptr) { + __handle_error("in directory_entry::refresh", __ec, __do_refresh(), + /*allow_dne*/ true); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_sym_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + return __symlink_status(__p_, __ec).type(); + case _IterSymlink: + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + if (__ec) + __ec->clear(); + return file_type::symlink; + case _IterNonSymlink: + case _RefreshNonSymlink: + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec).type(); + case _IterNonSymlink: + case _RefreshNonSymlink: + case _RefreshSymlink: { + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec); + case _RefreshNonSymlink: + case _RefreshSymlink: + return file_status(__get_ft(__ec), __data_.__non_sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_symlink_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + return __symlink_status(__p_, __ec); + case _RefreshNonSymlink: + return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_size(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__file_size(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::file_size", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && !_VSTD_FS::is_regular_file(__st)) { + errc __err_kind = _VSTD_FS::is_directory(__st) ? errc::is_a_directory + : errc::not_supported; + __handle_error("in directory_entry::file_size", __ec, + make_error_code(__err_kind)); + } + return __data_.__size_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_nlink(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__hard_link_count(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + (void)__get_ft(&__m_ec); + __handle_error("in directory_entry::hard_link_count", __ec, __m_ec); + return __data_.__nlink_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type __get_write_time(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__last_write_time(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::last_write_time", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && + __data_.__write_time_ == file_time_type::min()) + __handle_error("in directory_entry::last_write_time", __ec, + make_error_code(errc::value_too_large)); + return __data_.__write_time_; + } + } + _LIBCPP_UNREACHABLE(); + } + +private: + _Path __p_; + __cached_data __data_; +}; + +class __dir_element_proxy { +public: + inline _LIBCPP_INLINE_VISIBILITY directory_entry operator*() { + return _VSTD::move(__elem_); + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} + __dir_element_proxy(__dir_element_proxy&& __o) + : __elem_(_VSTD::move(__o.__elem_)) {} + directory_entry __elem_; +}; + +class directory_iterator { +public: + typedef directory_entry value_type; + typedef ptrdiff_t difference_type; + typedef value_type const* pointer; + typedef value_type const& reference; + typedef input_iterator_tag iterator_category; + +public: + //ctor & dtor + directory_iterator() noexcept {} + + explicit directory_iterator(const path& __p) + : directory_iterator(__p, nullptr) {} + + directory_iterator(const path& __p, directory_options __opts) + : directory_iterator(__p, nullptr, __opts) {} + + directory_iterator(const path& __p, error_code& __ec) + : directory_iterator(__p, &__ec) {} + + directory_iterator(const path& __p, directory_options __opts, + error_code& __ec) + : directory_iterator(__p, &__ec, __opts) {} + + directory_iterator(const directory_iterator&) = default; + directory_iterator(directory_iterator&&) = default; + directory_iterator& operator=(const directory_iterator&) = default; + + directory_iterator& operator=(directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + } + return *this; + } + + ~directory_iterator() = default; + + const directory_entry& operator*() const { + _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); + return __dereference(); + } + + const directory_entry* operator->() const { return &**this; } + + directory_iterator& operator++() { return __increment(); } + + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } + +private: + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept; + + // construct the dir_stream + _LIBCPP_FUNC_VIS + directory_iterator(const path&, error_code*, + directory_options = directory_options::none); + + _LIBCPP_FUNC_VIS + directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + +private: + shared_ptr<__dir_stream> __imp_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +inline _LIBCPP_INLINE_VISIBILITY bool +operator!=(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} + +// enable directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +begin(directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +end(const directory_iterator&) noexcept { + return directory_iterator(); +} + +class recursive_directory_iterator { +public: + using value_type = directory_entry; + using difference_type = std::ptrdiff_t; + using pointer = directory_entry const*; + using reference = directory_entry const&; + using iterator_category = std::input_iterator_tag; + +public: + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator() noexcept : __rec_(false) {} + + _LIBCPP_INLINE_VISIBILITY + explicit recursive_directory_iterator( + const path& __p, directory_options __xoptions = directory_options::none) + : recursive_directory_iterator(__p, __xoptions, nullptr) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, directory_options __xoptions, + error_code& __ec) + : recursive_directory_iterator(__p, __xoptions, &__ec) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, error_code& __ec) + : recursive_directory_iterator(__p, directory_options::none, &__ec) {} + + recursive_directory_iterator(const recursive_directory_iterator&) = default; + recursive_directory_iterator(recursive_directory_iterator&&) = default; + + recursive_directory_iterator& + operator=(const recursive_directory_iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& + operator=(recursive_directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + __rec_ = __o.__rec_; + } + return *this; + } + + ~recursive_directory_iterator() = default; + + _LIBCPP_INLINE_VISIBILITY + const directory_entry& operator*() const { return __dereference(); } + + _LIBCPP_INLINE_VISIBILITY + const directory_entry* operator->() const { return &__dereference(); } + + recursive_directory_iterator& operator++() { return __increment(); } + + _LIBCPP_INLINE_VISIBILITY + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& increment(error_code& __ec) { + return __increment(&__ec); + } + + _LIBCPP_FUNC_VIS directory_options options() const; + _LIBCPP_FUNC_VIS int depth() const; + + _LIBCPP_INLINE_VISIBILITY + void pop() { __pop(); } + + _LIBCPP_INLINE_VISIBILITY + void pop(error_code& __ec) { __pop(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + bool recursion_pending() const { return __rec_; } + + _LIBCPP_INLINE_VISIBILITY + void disable_recursion_pending() { __rec_ = false; } + +private: + recursive_directory_iterator(const path& __p, directory_options __opt, + error_code* __ec); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + + _LIBCPP_FUNC_VIS + bool __try_recursion(error_code* __ec); + + _LIBCPP_FUNC_VIS + void __advance(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + recursive_directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + void __pop(error_code* __ec = nullptr); + + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const recursive_directory_iterator&, + const recursive_directory_iterator&) noexcept; + + struct __shared_imp; + shared_ptr<__shared_imp> __imp_; + bool __rec_; +}; // class recursive_directory_iterator + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +_LIBCPP_INLINE_VISIBILITY +inline bool operator!=(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} +// enable recursive_directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +begin(recursive_directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +end(const recursive_directory_iterator&) noexcept { + return recursive_directory_iterator(); +} + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // !_LIBCPP_CXX03_LANG + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP_FILESYSTEM diff --git a/include/c++/v1/float.h b/include/c++/v1/float.h index 1acfdc6..759ac8e 100644 --- a/include/c++/v1/float.h +++ b/include/c++/v1/float.h @@ -24,7 +24,14 @@ DBL_MANT_DIG LDBL_MANT_DIG + FLT_HAS_SUBNORM // C11 + DBL_HAS_SUBNORM // C11 + LDBL_HAS_SUBNORM // C11 + DECIMAL_DIG // C99 + FLT_DECIMAL_DIG // C11 + DBL_DECIMAL_DIG // C11 + LDBL_DECIMAL_DIG // C11 FLT_DIG DBL_DIG @@ -58,6 +65,10 @@ DBL_MIN LDBL_MIN + FLT_TRUE_MIN // C11 + DBL_TRUE_MIN // C11 + LDBL_TRUE_MIN // C11 + */ #include <__config> diff --git a/include/c++/v1/forward_list b/include/c++/v1/forward_list index 7b82041..f9a71f0 100644 --- a/include/c++/v1/forward_list +++ b/include/c++/v1/forward_list @@ -134,6 +134,11 @@ public: void reverse() noexcept; }; + +template ::value_type>> + forward_list(InputIterator, InputIterator, Allocator = Allocator()) + -> forward_list::value_type, Allocator>; // C++17 + template bool operator==(const forward_list& x, const forward_list& y); @@ -172,6 +177,7 @@ template #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -452,6 +458,10 @@ protected: typedef typename allocator_traits<__begin_node_allocator>::pointer __begin_node_pointer; + static_assert((!is_same::value), + "internal allocator type must differ from user-specified " + "type; otherwise overload resolution breaks"); + __compressed_pair<__begin_node, __node_allocator> __before_begin_; _LIBCPP_INLINE_VISIBILITY @@ -476,9 +486,11 @@ protected: _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) : __before_begin_(__begin_node()) {} _LIBCPP_INLINE_VISIBILITY - __forward_list_base(const allocator_type& __a) + explicit __forward_list_base(const allocator_type& __a) : __before_begin_(__begin_node(), __node_allocator(__a)) {} - + _LIBCPP_INLINE_VISIBILITY + explicit __forward_list_base(const __node_allocator& __a) + : __before_begin_(__begin_node(), __a) {} #ifndef _LIBCPP_CXX03_LANG public: _LIBCPP_INLINE_VISIBILITY @@ -845,6 +857,23 @@ private: __sort(__node_pointer __f, difference_type __sz, _Compare& __comp); }; + +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value_type>, + class = typename enable_if<__is_allocator<_Alloc>::value, void>::type + > +forward_list(_InputIterator, _InputIterator) + -> forward_list::value_type, _Alloc>; + +template::value, void>::type + > +forward_list(_InputIterator, _InputIterator, _Alloc) + -> forward_list::value_type, _Alloc>; +#endif + template inline forward_list<_Tp, _Alloc>::forward_list(const allocator_type& __a) @@ -932,12 +961,9 @@ forward_list<_Tp, _Alloc>::forward_list(_InputIterator __f, _InputIterator __l, template forward_list<_Tp, _Alloc>::forward_list(const forward_list& __x) - : base(allocator_type( - __node_traits::select_on_container_copy_construction(__x.__alloc()) - ) - ) -{ - insert_after(cbefore_begin(), __x.begin(), __x.end()); + : base( + __node_traits::select_on_container_copy_construction(__x.__alloc())) { + insert_after(cbefore_begin(), __x.begin(), __x.end()); } template diff --git a/include/c++/v1/fstream b/include/c++/v1/fstream index 2ef4cf3..332b474 100644 --- a/include/c++/v1/fstream +++ b/include/c++/v1/fstream @@ -38,6 +38,7 @@ public: bool is_open() const; basic_filebuf* open(const char* s, ios_base::openmode mode); basic_filebuf* open(const string& s, ios_base::openmode mode); + basic_filebuf* open(const filesystem::path& p, ios_base::openmode mode); // C++17 basic_filebuf* close(); protected: @@ -77,6 +78,8 @@ public: basic_ifstream(); explicit basic_ifstream(const char* s, ios_base::openmode mode = ios_base::in); explicit basic_ifstream(const string& s, ios_base::openmode mode = ios_base::in); + explicit basic_ifstream(const filesystem::path& p, + ios_base::openmode mode = ios_base::in); // C++17 basic_ifstream(basic_ifstream&& rhs); basic_ifstream& operator=(basic_ifstream&& rhs); @@ -86,6 +89,8 @@ public: bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::in); void open(const string& s, ios_base::openmode mode = ios_base::in); + void open(const filesystem::path& s, ios_base::openmode mode = ios_base::in); // C++17 + void close(); }; @@ -110,6 +115,8 @@ public: basic_ofstream(); explicit basic_ofstream(const char* s, ios_base::openmode mode = ios_base::out); explicit basic_ofstream(const string& s, ios_base::openmode mode = ios_base::out); + explicit basic_ofstream(const filesystem::path& p, + ios_base::openmode mode = ios_base::out); // C++17 basic_ofstream(basic_ofstream&& rhs); basic_ofstream& operator=(basic_ofstream&& rhs); @@ -119,6 +126,9 @@ public: bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::out); void open(const string& s, ios_base::openmode mode = ios_base::out); + void open(const filesystem::path& p, + ios_base::openmode mode = ios_base::out); // C++17 + void close(); }; @@ -143,6 +153,8 @@ public: basic_fstream(); explicit basic_fstream(const char* s, ios_base::openmode mode = ios_base::in|ios_base::out); explicit basic_fstream(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out); + explicit basic_fstream(const filesystem::path& p, + ios_base::openmode mode = ios_base::in|ios_base::out); C++17 basic_fstream(basic_fstream&& rhs); basic_fstream& operator=(basic_fstream&& rhs); @@ -152,6 +164,9 @@ public: bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::in|ios_base::out); void open(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out); + void open(const filesystem::path& s, + ios_base::openmode mode = ios_base::in|ios_base::out); // C++17 + void close(); }; @@ -170,6 +185,8 @@ typedef basic_fstream wfstream; #include #include <__locale> #include +#include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -217,10 +234,23 @@ public: #endif _LIBCPP_INLINE_VISIBILITY basic_filebuf* open(const string& __s, ios_base::openmode __mode); + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + basic_filebuf* open(const _VSTD_FS::path& __p, ios_base::openmode __mode) { + return open(__p.c_str(), __mode); + } +#endif + _LIBCPP_INLINE_VISIBILITY + basic_filebuf* __open(int __fd, ios_base::openmode __mode); #endif basic_filebuf* close(); -protected: + _LIBCPP_INLINE_VISIBILITY + inline static const char* + __make_mdstring(ios_base::openmode __mode) _NOEXCEPT; + + protected: // 27.9.1.5 Overridden virtual functions: virtual int_type underflow(); virtual int_type pbackfail(int_type __c = traits_type::eof()); @@ -234,25 +264,25 @@ protected: virtual void imbue(const locale& __loc); private: - char* __extbuf_; - const char* __extbufnext_; - const char* __extbufend_; - char __extbuf_min_[8]; - size_t __ebs_; - char_type* __intbuf_; - size_t __ibs_; - FILE* __file_; - const codecvt* __cv_; - state_type __st_; - state_type __st_last_; - ios_base::openmode __om_; - ios_base::openmode __cm_; - bool __owns_eb_; - bool __owns_ib_; - bool __always_noconv_; - - bool __read_mode(); - void __write_mode(); + char* __extbuf_; + const char* __extbufnext_; + const char* __extbufend_; + char __extbuf_min_[8]; + size_t __ebs_; + char_type* __intbuf_; + size_t __ibs_; + FILE* __file_; + const codecvt* __cv_; + state_type __st_; + state_type __st_last_; + ios_base::openmode __om_; + ios_base::openmode __cm_; + bool __owns_eb_; + bool __owns_ib_; + bool __always_noconv_; + + bool __read_mode(); + void __write_mode(); }; template @@ -473,6 +503,46 @@ basic_filebuf<_CharT, _Traits>::is_open() const return __file_ != 0; } +template +const char* basic_filebuf<_CharT, _Traits>::__make_mdstring( + ios_base::openmode __mode) _NOEXCEPT { + switch (__mode & ~ios_base::ate) { + case ios_base::out: + case ios_base::out | ios_base::trunc: + return "w"; + case ios_base::out | ios_base::app: + case ios_base::app: + return "a"; + case ios_base::in: + return "r"; + case ios_base::in | ios_base::out: + return "r+"; + case ios_base::in | ios_base::out | ios_base::trunc: + return "w+"; + case ios_base::in | ios_base::out | ios_base::app: + case ios_base::in | ios_base::app: + return "a+"; + case ios_base::out | ios_base::binary: + case ios_base::out | ios_base::trunc | ios_base::binary: + return "wb"; + case ios_base::out | ios_base::app | ios_base::binary: + case ios_base::app | ios_base::binary: + return "ab"; + case ios_base::in | ios_base::binary: + return "rb"; + case ios_base::in | ios_base::out | ios_base::binary: + return "r+b"; + case ios_base::in | ios_base::out | ios_base::trunc | ios_base::binary: + return "w+b"; + case ios_base::in | ios_base::out | ios_base::app | ios_base::binary: + case ios_base::in | ios_base::app | ios_base::binary: + return "a+b"; + default: + return nullptr; + } + _LIBCPP_UNREACHABLE(); +} + #ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE template basic_filebuf<_CharT, _Traits>* @@ -481,79 +551,49 @@ basic_filebuf<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode) basic_filebuf<_CharT, _Traits>* __rt = 0; if (__file_ == 0) { + if (const char* __mdstr = __make_mdstring(__mode)) { __rt = this; - const char* __mdstr; - switch (__mode & ~ios_base::ate) - { - case ios_base::out: - case ios_base::out | ios_base::trunc: - __mdstr = "w"; - break; - case ios_base::out | ios_base::app: - case ios_base::app: - __mdstr = "a"; - break; - case ios_base::in: - __mdstr = "r"; - break; - case ios_base::in | ios_base::out: - __mdstr = "r+"; - break; - case ios_base::in | ios_base::out | ios_base::trunc: - __mdstr = "w+"; - break; - case ios_base::in | ios_base::out | ios_base::app: - case ios_base::in | ios_base::app: - __mdstr = "a+"; - break; - case ios_base::out | ios_base::binary: - case ios_base::out | ios_base::trunc | ios_base::binary: - __mdstr = "wb"; - break; - case ios_base::out | ios_base::app | ios_base::binary: - case ios_base::app | ios_base::binary: - __mdstr = "ab"; - break; - case ios_base::in | ios_base::binary: - __mdstr = "rb"; - break; - case ios_base::in | ios_base::out | ios_base::binary: - __mdstr = "r+b"; - break; - case ios_base::in | ios_base::out | ios_base::trunc | ios_base::binary: - __mdstr = "w+b"; - break; - case ios_base::in | ios_base::out | ios_base::app | ios_base::binary: - case ios_base::in | ios_base::app | ios_base::binary: - __mdstr = "a+b"; - break; - default: - __rt = 0; - break; - } - if (__rt) - { - __file_ = fopen(__s, __mdstr); - if (__file_) - { - __om_ = __mode; - if (__mode & ios_base::ate) - { - if (fseek(__file_, 0, SEEK_END)) - { - fclose(__file_); - __file_ = 0; - __rt = 0; - } - } + __file_ = fopen(__s, __mdstr); + if (__file_) { + __om_ = __mode; + if (__mode & ios_base::ate) { + if (fseek(__file_, 0, SEEK_END)) { + fclose(__file_); + __file_ = 0; + __rt = 0; } - else - __rt = 0; - } + } + } else + __rt = 0; + } } return __rt; } +template +_LIBCPP_INLINE_VISIBILITY basic_filebuf<_CharT, _Traits>* +basic_filebuf<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { + basic_filebuf<_CharT, _Traits>* __rt = 0; + if (__file_ == 0) { + if (const char* __mdstr = __make_mdstring(__mode)) { + __rt = this; + __file_ = fdopen(__fd, __mdstr); + if (__file_) { + __om_ = __mode; + if (__mode & ios_base::ate) { + if (fseek(__file_, 0, SEEK_END)) { + fclose(__file_); + __file_ = 0; + __rt = 0; + } + } + } else + __rt = 0; + } + } + return __rt; +} + #ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR // This is basically the same as the char* overload except that it uses _wfopen // and long mode strings. @@ -1110,6 +1150,11 @@ public: #endif _LIBCPP_INLINE_VISIBILITY explicit basic_ifstream(const string& __s, ios_base::openmode __mode = ios_base::in); +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + explicit basic_ifstream(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in) + : basic_ifstream(__p.c_str(), __mode) {} +#endif // _LIBCPP_STD_VER >= 17 #endif #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY @@ -1131,6 +1176,16 @@ public: void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in); #endif void open(const string& __s, ios_base::openmode __mode = ios_base::in); +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + void open(const filesystem::path& __p, + ios_base::openmode __mode = ios_base::in) { + return open(__p.c_str(), __mode); + } +#endif // _LIBCPP_STD_VER >= 17 + + _LIBCPP_INLINE_VISIBILITY + void __open(int __fd, ios_base::openmode __mode); #endif _LIBCPP_INLINE_VISIBILITY void close(); @@ -1265,6 +1320,15 @@ basic_ifstream<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mo else this->setstate(ios_base::failbit); } + +template +void basic_ifstream<_CharT, _Traits>::__open(int __fd, + ios_base::openmode __mode) { + if (__sb_.__open(__fd, __mode | ios_base::in)) + this->clear(); + else + this->setstate(ios_base::failbit); +} #endif template @@ -1299,6 +1363,13 @@ public: #endif _LIBCPP_INLINE_VISIBILITY explicit basic_ofstream(const string& __s, ios_base::openmode __mode = ios_base::out); + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + explicit basic_ofstream(const filesystem::path& __p, ios_base::openmode __mode = ios_base::out) + : basic_ofstream(__p.c_str(), __mode) {} +#endif // _LIBCPP_STD_VER >= 17 + #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_ofstream(basic_ofstream&& __rhs); @@ -1319,6 +1390,15 @@ public: void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::out); #endif void open(const string& __s, ios_base::openmode __mode = ios_base::out); + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + void open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::out) + { return open(__p.c_str(), __mode); } +#endif // _LIBCPP_STD_VER >= 17 + + _LIBCPP_INLINE_VISIBILITY + void __open(int __fd, ios_base::openmode __mode); #endif _LIBCPP_INLINE_VISIBILITY void close(); @@ -1453,6 +1533,15 @@ basic_ofstream<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mo else this->setstate(ios_base::failbit); } + +template +void basic_ofstream<_CharT, _Traits>::__open(int __fd, + ios_base::openmode __mode) { + if (__sb_.__open(__fd, __mode | ios_base::out)) + this->clear(); + else + this->setstate(ios_base::failbit); +} #endif template @@ -1488,6 +1577,13 @@ public: #endif _LIBCPP_INLINE_VISIBILITY explicit basic_fstream(const string& __s, ios_base::openmode __mode = ios_base::in | ios_base::out); + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + explicit basic_fstream(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in | ios_base::out) + : basic_fstream(__p.c_str(), __mode) {} +#endif // _LIBCPP_STD_VER >= 17 + #endif #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY @@ -1509,6 +1605,13 @@ public: void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in | ios_base::out); #endif void open(const string& __s, ios_base::openmode __mode = ios_base::in | ios_base::out); + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_INLINE_VISIBILITY + void open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in|ios_base::out) + { return open(__p.c_str(), __mode); } +#endif // _LIBCPP_STD_VER >= 17 + #endif _LIBCPP_INLINE_VISIBILITY void close(); diff --git a/include/c++/v1/functional b/include/c++/v1/functional index e45beb8..61c87b0 100644 --- a/include/c++/v1/functional +++ b/include/c++/v1/functional @@ -183,7 +183,7 @@ struct bit_xor : unary_function }; template -class unary_negate +class unary_negate // deprecated in C++17 : public unary_function { public: @@ -191,10 +191,11 @@ public: bool operator()(const typename Predicate::argument_type& x) const; }; -template unary_negate not1(const Predicate& pred); +template // deprecated in C++17 +unary_negate not1(const Predicate& pred); template -class binary_negate +class binary_negate // deprecated in C++17 : public binary_function @@ -205,7 +206,8 @@ public: const typename Predicate::second_argument_type& y) const; }; -template binary_negate not2(const Predicate& pred); +template // deprecated in C++17 +binary_negate not2(const Predicate& pred); template unspecified not_fn(F&& f); // C++17 @@ -487,6 +489,7 @@ POLICY: For non-variadic implementations, the number of arguments is limited #include #include #include +#include #include <__functional_base> @@ -980,7 +983,7 @@ struct _LIBCPP_TEMPLATE_VIS bit_not #endif template -class _LIBCPP_TEMPLATE_VIS unary_negate +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 unary_negate : public unary_function { _Predicate __pred_; @@ -994,19 +997,19 @@ public: }; template -inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY unary_negate<_Predicate> not1(const _Predicate& __pred) {return unary_negate<_Predicate>(__pred);} template -class _LIBCPP_TEMPLATE_VIS binary_negate +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 binary_negate : public binary_function { _Predicate __pred_; public: - _LIBCPP_INLINE_VISIBILITY explicit _LIBCPP_CONSTEXPR_AFTER_CXX11 + _LIBCPP_INLINE_VISIBILITY explicit _LIBCPP_CONSTEXPR_AFTER_CXX11 binary_negate(const _Predicate& __pred) : __pred_(__pred) {} _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY @@ -1016,13 +1019,13 @@ public: }; template -inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY binary_negate<_Predicate> not2(const _Predicate& __pred) {return binary_negate<_Predicate>(__pred);} #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS binder1st +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder1st : public unary_function { @@ -1042,13 +1045,13 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY binder1st<__Operation> bind1st(const __Operation& __op, const _Tp& __x) {return binder1st<__Operation>(__op, __x);} template -class _LIBCPP_TEMPLATE_VIS binder2nd +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder2nd : public unary_function { @@ -1068,13 +1071,13 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY binder2nd<__Operation> bind2nd(const __Operation& __op, const _Tp& __x) {return binder2nd<__Operation>(__op, __x);} template -class _LIBCPP_TEMPLATE_VIS pointer_to_unary_function +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_unary_function : public unary_function<_Arg, _Result> { _Result (*__f_)(_Arg); @@ -1086,13 +1089,13 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY pointer_to_unary_function<_Arg,_Result> ptr_fun(_Result (*__f)(_Arg)) {return pointer_to_unary_function<_Arg,_Result>(__f);} template -class _LIBCPP_TEMPLATE_VIS pointer_to_binary_function +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_binary_function : public binary_function<_Arg1, _Arg2, _Result> { _Result (*__f_)(_Arg1, _Arg2); @@ -1104,13 +1107,14 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY pointer_to_binary_function<_Arg1,_Arg2,_Result> ptr_fun(_Result (*__f)(_Arg1,_Arg2)) {return pointer_to_binary_function<_Arg1,_Arg2,_Result>(__f);} template -class _LIBCPP_TEMPLATE_VIS mem_fun_t : public unary_function<_Tp*, _Sp> +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_t + : public unary_function<_Tp*, _Sp> { _Sp (_Tp::*__p_)(); public: @@ -1121,7 +1125,8 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS mem_fun1_t : public binary_function<_Tp*, _Ap, _Sp> +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_t + : public binary_function<_Tp*, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap); public: @@ -1132,19 +1137,20 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY mem_fun_t<_Sp,_Tp> mem_fun(_Sp (_Tp::*__f)()) {return mem_fun_t<_Sp,_Tp>(__f);} template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY mem_fun1_t<_Sp,_Tp,_Ap> mem_fun(_Sp (_Tp::*__f)(_Ap)) {return mem_fun1_t<_Sp,_Tp,_Ap>(__f);} template -class _LIBCPP_TEMPLATE_VIS mem_fun_ref_t : public unary_function<_Tp, _Sp> +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_ref_t + : public unary_function<_Tp, _Sp> { _Sp (_Tp::*__p_)(); public: @@ -1155,7 +1161,8 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS mem_fun1_ref_t : public binary_function<_Tp, _Ap, _Sp> +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_ref_t + : public binary_function<_Tp, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap); public: @@ -1166,19 +1173,20 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY mem_fun_ref_t<_Sp,_Tp> mem_fun_ref(_Sp (_Tp::*__f)()) {return mem_fun_ref_t<_Sp,_Tp>(__f);} template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY mem_fun1_ref_t<_Sp,_Tp,_Ap> mem_fun_ref(_Sp (_Tp::*__f)(_Ap)) {return mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} template -class _LIBCPP_TEMPLATE_VIS const_mem_fun_t : public unary_function +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_t + : public unary_function { _Sp (_Tp::*__p_)() const; public: @@ -1189,7 +1197,8 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS const_mem_fun1_t : public binary_function +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_t + : public binary_function { _Sp (_Tp::*__p_)(_Ap) const; public: @@ -1200,19 +1209,20 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY const_mem_fun_t<_Sp,_Tp> mem_fun(_Sp (_Tp::*__f)() const) {return const_mem_fun_t<_Sp,_Tp>(__f);} template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY const_mem_fun1_t<_Sp,_Tp,_Ap> mem_fun(_Sp (_Tp::*__f)(_Ap) const) {return const_mem_fun1_t<_Sp,_Tp,_Ap>(__f);} template -class _LIBCPP_TEMPLATE_VIS const_mem_fun_ref_t : public unary_function<_Tp, _Sp> +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_ref_t + : public unary_function<_Tp, _Sp> { _Sp (_Tp::*__p_)() const; public: @@ -1223,7 +1233,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS const_mem_fun1_ref_t +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_ref_t : public binary_function<_Tp, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap) const; @@ -1235,13 +1245,13 @@ public: }; template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY const_mem_fun_ref_t<_Sp,_Tp> mem_fun_ref(_Sp (_Tp::*__f)() const) {return const_mem_fun_ref_t<_Sp,_Tp>(__f);} template -inline _LIBCPP_INLINE_VISIBILITY +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY const_mem_fun1_ref_t<_Sp,_Tp,_Ap> mem_fun_ref(_Sp (_Tp::*__f)(_Ap) const) {return const_mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} @@ -1399,13 +1409,13 @@ public: #endif }; -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_function_call() { #ifndef _LIBCPP_NO_EXCEPTIONS throw bad_function_call(); #else - _VSTD::abort(); + _VSTD::abort(); #endif } @@ -1790,7 +1800,7 @@ function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a0, _Fp _ typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _FF; typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap; _Ap __a(__a0); - if (sizeof(_FF) <= sizeof(__buf_) && + if (sizeof(_FF) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value && is_nothrow_copy_constructible<_Ap>::value) { __f_ = ::new((void*)&__buf_) _FF(_VSTD::move(__f), _Alloc(__a)); @@ -1818,11 +1828,7 @@ template function<_Rp(_ArgTypes...)>& function<_Rp(_ArgTypes...)>::operator=(function&& __f) _NOEXCEPT { - if ((void *)__f_ == &__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); - __f_ = 0; + *this = nullptr; if (__f.__f_ == 0) __f_ = 0; else if ((void *)__f.__f_ == &__f.__buf_) @@ -1842,11 +1848,12 @@ template function<_Rp(_ArgTypes...)>& function<_Rp(_ArgTypes...)>::operator=(nullptr_t) _NOEXCEPT { - if ((void *)__f_ == &__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); + __base* __t = __f_; __f_ = 0; + if ((void *)__t == &__buf_) + __t->destroy(); + else if (__t) + __t->destroy_deallocate(); return *this; } @@ -2008,7 +2015,7 @@ namespace placeholders template struct __ph {}; -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_BIND) +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_FUNC_VIS extern const __ph<1> _1; _LIBCPP_FUNC_VIS extern const __ph<2> _2; _LIBCPP_FUNC_VIS extern const __ph<3> _3; @@ -2030,7 +2037,7 @@ _LIBCPP_FUNC_VIS extern const __ph<10> _10; /* _LIBCPP_INLINE_VAR */ constexpr __ph<8> _8{}; /* _LIBCPP_INLINE_VAR */ constexpr __ph<9> _9{}; /* _LIBCPP_INLINE_VAR */ constexpr __ph<10> _10{}; -#endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_BIND) +#endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) } // placeholders @@ -2108,53 +2115,53 @@ __mu(_Ti& __ti, _Uj&) template -struct ____mu_return; +struct __mu_return_impl; template -struct ____mu_return_invokable // false +struct __mu_return_invokable // false { typedef __nat type; }; template -struct ____mu_return_invokable +struct __mu_return_invokable { typedef typename __invoke_of<_Ti&, _Uj...>::type type; }; template -struct ____mu_return<_Ti, false, true, false, tuple<_Uj...> > - : public ____mu_return_invokable<__invokable<_Ti&, _Uj...>::value, _Ti, _Uj...> +struct __mu_return_impl<_Ti, false, true, false, tuple<_Uj...> > + : public __mu_return_invokable<__invokable<_Ti&, _Uj...>::value, _Ti, _Uj...> { }; template -struct ____mu_return<_Ti, false, false, true, _TupleUj> +struct __mu_return_impl<_Ti, false, false, true, _TupleUj> { typedef typename tuple_element::value - 1, _TupleUj>::type&& type; }; template -struct ____mu_return<_Ti, true, false, false, _TupleUj> +struct __mu_return_impl<_Ti, true, false, false, _TupleUj> { typedef typename _Ti::type& type; }; template -struct ____mu_return<_Ti, false, false, false, _TupleUj> +struct __mu_return_impl<_Ti, false, false, false, _TupleUj> { typedef _Ti& type; }; template struct __mu_return - : public ____mu_return<_Ti, - __is_reference_wrapper<_Ti>::value, - is_bind_expression<_Ti>::value, - 0 < is_placeholder<_Ti>::value && - is_placeholder<_Ti>::value <= tuple_size<_TupleUj>::value, - _TupleUj> + : public __mu_return_impl<_Ti, + __is_reference_wrapper<_Ti>::value, + is_bind_expression<_Ti>::value, + 0 < is_placeholder<_Ti>::value && + is_placeholder<_Ti>::value <= tuple_size<_TupleUj>::value, + _TupleUj> { }; @@ -2343,8 +2350,6 @@ bind(_Fp&& __f, _BoundArgs&&... __bound_args) #if _LIBCPP_STD_VER > 14 -#define __cpp_lib_invoke 201411 - template result_of_t<_Fn&&(_Args&&...)> invoke(_Fn&& __f, _Args&&... __args) @@ -2500,7 +2505,7 @@ template> class _LIBCPP_TYPE_VIS default_searcher { public: _LIBCPP_INLINE_VISIBILITY - default_searcher(_ForwardIterator __f, _ForwardIterator __l, + default_searcher(_ForwardIterator __f, _ForwardIterator __l, _BinaryPredicate __p = _BinaryPredicate()) : __first_(__f), __last_(__l), __pred_(__p) {} diff --git a/include/c++/v1/future b/include/c++/v1/future index 536574e..b3ffc7e 100644 --- a/include/c++/v1/future +++ b/include/c++/v1/future @@ -409,7 +409,7 @@ _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(launch) #ifndef _LIBCPP_HAS_NO_STRONG_ENUMS -#ifdef _LIBCXX_UNDERLYING_TYPE +#ifdef _LIBCPP_UNDERLYING_TYPE typedef underlying_type::type __launch_underlying_type; #else typedef int __launch_underlying_type; @@ -514,7 +514,7 @@ public: virtual ~future_error() _NOEXCEPT; }; -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY #ifndef _LIBCPP_NO_EXCEPTIONS _LIBCPP_AVAILABILITY_FUTURE_ERROR #endif @@ -556,13 +556,14 @@ public: {return (__state_ & __constructed) || (__exception_ != nullptr);} _LIBCPP_INLINE_VISIBILITY - void __set_future_attached() - { + void __attach_future() { lock_guard __lk(__mut_); + bool __has_future_attached = (__state_ & __future_attached) != 0; + if (__has_future_attached) + __throw_future_error(future_errc::future_already_retrieved); + this->__add_shared(); __state_ |= __future_attached; } - _LIBCPP_INLINE_VISIBILITY - bool __has_future_attached() const {return (__state_ & __future_attached) != 0;} _LIBCPP_INLINE_VISIBILITY void __set_deferred() {__state_ |= deferred;} @@ -1154,10 +1155,7 @@ template future<_Rp>::future(__assoc_state<_Rp>* __state) : __state_(__state) { - if (__state_->__has_future_attached()) - __throw_future_error(future_errc::future_already_retrieved); - __state_->__add_shared(); - __state_->__set_future_attached(); + __state_->__attach_future(); } struct __release_shared_count @@ -1257,10 +1255,7 @@ template future<_Rp&>::future(__assoc_state<_Rp&>* __state) : __state_(__state) { - if (__state_->__has_future_attached()) - __throw_future_error(future_errc::future_already_retrieved); - __state_->__add_shared(); - __state_->__set_future_attached(); + __state_->__attach_future(); } template diff --git a/include/c++/v1/initializer_list b/include/c++/v1/initializer_list index 8c234aa..b934637 100644 --- a/include/c++/v1/initializer_list +++ b/include/c++/v1/initializer_list @@ -61,7 +61,7 @@ class _LIBCPP_TEMPLATE_VIS initializer_list const _Ep* __begin_; size_t __size_; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 initializer_list(const _Ep* __b, size_t __s) _NOEXCEPT : __begin_(__b), @@ -76,19 +76,19 @@ public: typedef const _Ep* iterator; typedef const _Ep* const_iterator; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 initializer_list() _NOEXCEPT : __begin_(nullptr), __size_(0) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 size_t size() const _NOEXCEPT {return __size_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 const _Ep* begin() const _NOEXCEPT {return __begin_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 const _Ep* end() const _NOEXCEPT {return __begin_ + __size_;} }; diff --git a/include/c++/v1/iomanip b/include/c++/v1/iomanip index a6bee73..36c1116 100644 --- a/include/c++/v1/iomanip +++ b/include/c++/v1/iomanip @@ -46,6 +46,7 @@ template #include <__config> #include <__string> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/include/c++/v1/ios b/include/c++/v1/ios index 6e32d0f..040b2d4 100644 --- a/include/c++/v1/ios +++ b/include/c++/v1/ios @@ -337,9 +337,9 @@ protected: } void init(void* __sb); - _LIBCPP_ALWAYS_INLINE void* rdbuf() const {return __rdbuf_;} + _LIBCPP_INLINE_VISIBILITY void* rdbuf() const {return __rdbuf_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void rdbuf(void* __sb) { __rdbuf_ = __sb; @@ -351,7 +351,7 @@ protected: void move(ios_base&); void swap(ios_base&) _NOEXCEPT; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void set_rdbuf(void* __sb) { __rdbuf_ = __sb; @@ -599,26 +599,26 @@ public: // we give it internal linkage. #if defined(_LIBCPP_CXX03_LANG) - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY operator __cxx03_bool::__bool_type() const { return !fail() ? &__cxx03_bool::__true_value : nullptr; } #else - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const {return !fail();} #endif - _LIBCPP_ALWAYS_INLINE bool operator!() const {return fail();} - _LIBCPP_ALWAYS_INLINE iostate rdstate() const {return ios_base::rdstate();} - _LIBCPP_ALWAYS_INLINE void clear(iostate __state = goodbit) {ios_base::clear(__state);} - _LIBCPP_ALWAYS_INLINE void setstate(iostate __state) {ios_base::setstate(__state);} - _LIBCPP_ALWAYS_INLINE bool good() const {return ios_base::good();} - _LIBCPP_ALWAYS_INLINE bool eof() const {return ios_base::eof();} - _LIBCPP_ALWAYS_INLINE bool fail() const {return ios_base::fail();} - _LIBCPP_ALWAYS_INLINE bool bad() const {return ios_base::bad();} + _LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();} + _LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();} + _LIBCPP_INLINE_VISIBILITY void clear(iostate __state = goodbit) {ios_base::clear(__state);} + _LIBCPP_INLINE_VISIBILITY void setstate(iostate __state) {ios_base::setstate(__state);} + _LIBCPP_INLINE_VISIBILITY bool good() const {return ios_base::good();} + _LIBCPP_INLINE_VISIBILITY bool eof() const {return ios_base::eof();} + _LIBCPP_INLINE_VISIBILITY bool fail() const {return ios_base::fail();} + _LIBCPP_INLINE_VISIBILITY bool bad() const {return ios_base::bad();} - _LIBCPP_ALWAYS_INLINE iostate exceptions() const {return ios_base::exceptions();} - _LIBCPP_ALWAYS_INLINE void exceptions(iostate __iostate) {ios_base::exceptions(__iostate);} + _LIBCPP_INLINE_VISIBILITY iostate exceptions() const {return ios_base::exceptions();} + _LIBCPP_INLINE_VISIBILITY void exceptions(iostate __iostate) {ios_base::exceptions(__iostate);} // 27.5.4.1 Constructor/destructor: _LIBCPP_INLINE_VISIBILITY @@ -652,7 +652,7 @@ public: char_type widen(char __c) const; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY basic_ios() {// purposefully does no initialization } _LIBCPP_INLINE_VISIBILITY @@ -661,7 +661,7 @@ protected: _LIBCPP_INLINE_VISIBILITY void move(basic_ios& __rhs); #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void move(basic_ios&& __rhs) {move(__rhs);} #endif _LIBCPP_INLINE_VISIBILITY diff --git a/include/c++/v1/istream b/include/c++/v1/istream index f2579c1..71c162b 100644 --- a/include/c++/v1/istream +++ b/include/c++/v1/istream @@ -358,381 +358,162 @@ basic_istream<_CharT, _Traits>::~basic_istream() { } -template +template +_LIBCPP_INLINE_VISIBILITY basic_istream<_CharT, _Traits>& -basic_istream<_CharT, _Traits>::operator>>(unsigned short& __n) -{ +__input_arithmetic(basic_istream<_CharT, _Traits>& __is, _Tp& __n) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); + typename basic_istream<_CharT, _Traits>::sentry __s(__is); if (__s) { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; + typedef istreambuf_iterator<_CharT, _Traits> _Ip; + typedef num_get<_CharT, _Ip> _Fp; ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); + use_facet<_Fp>(__is.getloc()).get(_Ip(__is), _Ip(), __is, __err, __n); + __is.setstate(__err); } #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { - this->__set_badbit_and_consider_rethrow(); + __is.__set_badbit_and_consider_rethrow(); } #endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return __is; +} + +template +basic_istream<_CharT, _Traits>& +basic_istream<_CharT, _Traits>::operator>>(unsigned short& __n) +{ + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(unsigned int& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(long& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(unsigned long& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(long long& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(unsigned long long& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(float& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(double& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(long double& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(bool& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(void*& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __n); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic(*this, __n); } -template +template +_LIBCPP_INLINE_VISIBILITY basic_istream<_CharT, _Traits>& -basic_istream<_CharT, _Traits>::operator>>(short& __n) -{ +__input_arithmetic_with_numeric_limits(basic_istream<_CharT, _Traits>& __is, _Tp& __n) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); + typename basic_istream<_CharT, _Traits>::sentry __s(__is); if (__s) { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; + typedef istreambuf_iterator<_CharT, _Traits> _Ip; + typedef num_get<_CharT, _Ip> _Fp; ios_base::iostate __err = ios_base::goodbit; long __temp; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __temp); - if (__temp < numeric_limits::min()) + use_facet<_Fp>(__is.getloc()).get(_Ip(__is), _Ip(), __is, __err, __temp); + if (__temp < numeric_limits<_Tp>::min()) { __err |= ios_base::failbit; - __n = numeric_limits::min(); + __n = numeric_limits<_Tp>::min(); } - else if (__temp > numeric_limits::max()) + else if (__temp > numeric_limits<_Tp>::max()) { __err |= ios_base::failbit; - __n = numeric_limits::max(); + __n = numeric_limits<_Tp>::max(); } else - __n = static_cast(__temp); - this->setstate(__err); + __n = static_cast<_Tp>(__temp); + __is.setstate(__err); } #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { - this->__set_badbit_and_consider_rethrow(); + __is.__set_badbit_and_consider_rethrow(); } #endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return __is; +} + +template +basic_istream<_CharT, _Traits>& +basic_istream<_CharT, _Traits>::operator>>(short& __n) +{ + return _VSTD::__input_arithmetic_with_numeric_limits(*this, __n); } template basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::operator>>(int& __n) { -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - sentry __s(*this); - if (__s) - { - typedef istreambuf_iterator _Ip; - typedef num_get _Fp; - ios_base::iostate __err = ios_base::goodbit; - long __temp; - use_facet<_Fp>(this->getloc()).get(_Ip(*this), _Ip(), *this, __err, __temp); - if (__temp < numeric_limits::min()) - { - __err |= ios_base::failbit; - __n = numeric_limits::min(); - } - else if (__temp > numeric_limits::max()) - { - __err |= ios_base::failbit; - __n = numeric_limits::max(); - } - else - __n = static_cast(__temp); - this->setstate(__err); - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - this->__set_badbit_and_consider_rethrow(); - } -#endif // _LIBCPP_NO_EXCEPTIONS - return *this; + return _VSTD::__input_arithmetic_with_numeric_limits(*this, __n); } template diff --git a/include/c++/v1/iterator b/include/c++/v1/iterator index 8b887db..aed0525 100644 --- a/include/c++/v1/iterator +++ b/include/c++/v1/iterator @@ -418,6 +418,7 @@ template constexpr const E* data(initializer_list il) noexcept; #include #include #include +#include #ifdef __APPLE__ #include #endif @@ -1217,38 +1218,38 @@ make_move_iterator(_Iter __i) template class __wrap_iter; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; #ifndef _LIBCPP_CXX03_LANG template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG -> decltype(__x.base() - __y.base()); @@ -1260,7 +1261,7 @@ operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBU #endif template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter<_Iter> operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOEXCEPT_DEBUG; @@ -1272,7 +1273,7 @@ template _B2 _LIBCPP_INLINE_VISIBILITY move_backward(_B1, #if _LIBCPP_DEBUG_LEVEL < 2 template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1283,7 +1284,7 @@ __unwrap_iter(__wrap_iter<_Tp*>); #else template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1306,7 +1307,7 @@ public: private: iterator_type __i; public: - _LIBCPP_INLINE_VISIBILITY __wrap_iter() _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter() _NOEXCEPT_DEBUG #if _LIBCPP_STD_VER > 11 : __i{} #endif @@ -1315,22 +1316,23 @@ public: __get_db()->__insert_i(this); #endif } - template _LIBCPP_INLINE_VISIBILITY __wrap_iter(const __wrap_iter<_Up>& __u, - typename enable_if::value>::type* = 0) _NOEXCEPT_DEBUG - : __i(__u.base()) + template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + __wrap_iter(const __wrap_iter<_Up>& __u, + typename enable_if::value>::type* = 0) _NOEXCEPT_DEBUG + : __i(__u.base()) { #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__iterator_copy(this, &__u); #endif } #if _LIBCPP_DEBUG_LEVEL >= 2 - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const __wrap_iter& __x) : __i(__x.base()) { __get_db()->__iterator_copy(this, &__x); } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator=(const __wrap_iter& __x) { if (this != &__x) @@ -1340,13 +1342,13 @@ public: } return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG ~__wrap_iter() { __get_db()->__erase_i(this); } #endif - _LIBCPP_INLINE_VISIBILITY reference operator*() const _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), @@ -1354,7 +1356,7 @@ public: #endif return *__i; } - _LIBCPP_INLINE_VISIBILITY pointer operator->() const _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), @@ -1362,7 +1364,7 @@ public: #endif return (pointer)_VSTD::addressof(*__i); } - _LIBCPP_INLINE_VISIBILITY __wrap_iter& operator++() _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), @@ -1371,9 +1373,10 @@ public: ++__i; return *this; } - _LIBCPP_INLINE_VISIBILITY __wrap_iter operator++(int) _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator++(int) _NOEXCEPT_DEBUG {__wrap_iter __tmp(*this); ++(*this); return __tmp;} - _LIBCPP_INLINE_VISIBILITY __wrap_iter& operator--() _NOEXCEPT_DEBUG + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), @@ -1382,11 +1385,11 @@ public: --__i; return *this; } - _LIBCPP_INLINE_VISIBILITY __wrap_iter operator--(int) _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator--(int) _NOEXCEPT_DEBUG {__wrap_iter __tmp(*this); --(*this); return __tmp;} - _LIBCPP_INLINE_VISIBILITY __wrap_iter operator+ (difference_type __n) const _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator+ (difference_type __n) const _NOEXCEPT_DEBUG {__wrap_iter __w(*this); __w += __n; return __w;} - _LIBCPP_INLINE_VISIBILITY __wrap_iter& operator+=(difference_type __n) _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n), @@ -1395,11 +1398,11 @@ public: __i += __n; return *this; } - _LIBCPP_INLINE_VISIBILITY __wrap_iter operator- (difference_type __n) const _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator- (difference_type __n) const _NOEXCEPT_DEBUG {return *this + (-__n);} - _LIBCPP_INLINE_VISIBILITY __wrap_iter& operator-=(difference_type __n) _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator-=(difference_type __n) _NOEXCEPT_DEBUG {*this += -__n; return *this;} - _LIBCPP_INLINE_VISIBILITY reference operator[](difference_type __n) const _NOEXCEPT_DEBUG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT_DEBUG { #if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n), @@ -1408,67 +1411,68 @@ public: return __i[__n]; } - _LIBCPP_INLINE_VISIBILITY iterator_type base() const _NOEXCEPT_DEBUG {return __i;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT_DEBUG {return __i;} private: #if _LIBCPP_DEBUG_LEVEL >= 2 - _LIBCPP_INLINE_VISIBILITY __wrap_iter(const void* __p, iterator_type __x) : __i(__x) + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x) { __get_db()->__insert_ic(this, __p); } #else - _LIBCPP_INLINE_VISIBILITY __wrap_iter(iterator_type __x) _NOEXCEPT_DEBUG : __i(__x) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(iterator_type __x) _NOEXCEPT_DEBUG : __i(__x) {} #endif template friend class __wrap_iter; template friend class basic_string; template friend class _LIBCPP_TEMPLATE_VIS vector; + template friend class _LIBCPP_TEMPLATE_VIS span; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend bool operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; #ifndef _LIBCPP_CXX03_LANG template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG -> decltype(__x.base() - __y.base()); #else template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend typename __wrap_iter<_Iter1>::difference_type operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT_DEBUG; #endif template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend __wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type, __wrap_iter<_Iter1>) _NOEXCEPT_DEBUG; @@ -1479,7 +1483,7 @@ private: #if _LIBCPP_DEBUG_LEVEL < 2 template - friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1488,7 +1492,7 @@ private: __unwrap_iter(__wrap_iter<_Tp*>); #else template - inline _LIBCPP_INLINE_VISIBILITY + inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1499,7 +1503,7 @@ private: }; template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1507,7 +1511,7 @@ operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1519,7 +1523,7 @@ operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1527,7 +1531,7 @@ operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1535,7 +1539,7 @@ operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1543,7 +1547,7 @@ operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1551,7 +1555,7 @@ operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT_DEBUG { @@ -1559,7 +1563,7 @@ operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT_DEBUG { @@ -1567,7 +1571,7 @@ operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXC } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT_DEBUG { @@ -1575,7 +1579,7 @@ operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEX } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT_DEBUG { @@ -1584,7 +1588,7 @@ operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEX #ifndef _LIBCPP_CXX03_LANG template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG -> decltype(__x.base() - __y.base()) @@ -1597,7 +1601,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC } #else template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename __wrap_iter<_Iter1>::difference_type operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT_DEBUG { @@ -1610,7 +1614,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC #endif template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter<_Iter> operator+(typename __wrap_iter<_Iter>::difference_type __n, __wrap_iter<_Iter> __x) _NOEXCEPT_DEBUG diff --git a/include/c++/v1/list b/include/c++/v1/list index f884b16..8f0e1db 100644 --- a/include/c++/v1/list +++ b/include/c++/v1/list @@ -147,6 +147,11 @@ public: void reverse() noexcept; }; + +template ::value_type>> + list(InputIterator, InputIterator, Allocator = Allocator()) + -> list::value_type, Allocator>; // C++17 + template bool operator==(const list& x, const list& y); template @@ -176,6 +181,7 @@ template #include #include #include +#include #include <__debug> @@ -527,11 +533,12 @@ class __list_imp { __list_imp(const __list_imp&); __list_imp& operator=(const __list_imp&); -protected: - typedef _Tp value_type; +public: typedef _Alloc allocator_type; typedef allocator_traits __alloc_traits; typedef typename __alloc_traits::size_type size_type; +protected: + typedef _Tp value_type; typedef typename __alloc_traits::void_pointer __void_pointer; typedef __list_iterator iterator; typedef __list_const_iterator const_iterator; @@ -550,6 +557,9 @@ protected: typedef typename __rebind_alloc_helper<__alloc_traits, __node_base>::type __node_base_allocator; typedef typename allocator_traits<__node_base_allocator>::pointer __node_base_pointer; + static_assert((!is_same::value), + "internal allocator type must differ from user-specified " + "type; otherwise overload resolution breaks"); __node_base __end_; __compressed_pair __size_alloc_; @@ -584,6 +594,11 @@ protected: _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value); _LIBCPP_INLINE_VISIBILITY __list_imp(const allocator_type& __a); + _LIBCPP_INLINE_VISIBILITY + __list_imp(const __node_allocator& __a); +#ifndef _LIBCPP_CXX03_LANG + __list_imp(__node_allocator&& __a) _NOEXCEPT; +#endif ~__list_imp(); void clear() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY @@ -707,9 +722,18 @@ __list_imp<_Tp, _Alloc>::__list_imp(const allocator_type& __a) } template -__list_imp<_Tp, _Alloc>::~__list_imp() -{ - clear(); +inline __list_imp<_Tp, _Alloc>::__list_imp(const __node_allocator& __a) + : __size_alloc_(0, __a) {} + +#ifndef _LIBCPP_CXX03_LANG +template +inline __list_imp<_Tp, _Alloc>::__list_imp(__node_allocator&& __a) _NOEXCEPT + : __size_alloc_(0, std::move(__a)) {} +#endif + +template +__list_imp<_Tp, _Alloc>::~__list_imp() { + clear(); #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__erase_c(this); #endif @@ -1106,6 +1130,22 @@ private: void __move_assign(list& __c, false_type); }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value_type>, + class = typename enable_if<__is_allocator<_Alloc>::value, void>::type + > +list(_InputIterator, _InputIterator) + -> list::value_type, _Alloc>; + +template::value, void>::type + > +list(_InputIterator, _InputIterator, _Alloc) + -> list::value_type, _Alloc>; +#endif + // Link in nodes [__f, __l] just prior to __p template inline @@ -1226,10 +1266,8 @@ list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, const allocator_type& __a, template list<_Tp, _Alloc>::list(const list& __c) - : base(allocator_type( - __node_alloc_traits::select_on_container_copy_construction( - __c.__node_alloc()))) -{ + : base(__node_alloc_traits::select_on_container_copy_construction( + __c.__node_alloc())) { #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); #endif @@ -1274,11 +1312,9 @@ list<_Tp, _Alloc>::list(initializer_list __il) } template -inline -list<_Tp, _Alloc>::list(list&& __c) +inline list<_Tp, _Alloc>::list(list&& __c) _NOEXCEPT_(is_nothrow_move_constructible<__node_allocator>::value) - : base(allocator_type(_VSTD::move(__c.__node_alloc()))) -{ + : base(_VSTD::move(__c.__node_alloc())) { #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); #endif @@ -2173,7 +2209,7 @@ template void list<_Tp, _Alloc>::merge(list& __c, _Comp __comp) { - if (this != &__c) + if (this != _VSTD::addressof(__c)) { iterator __f1 = begin(); iterator __e1 = end(); diff --git a/include/c++/v1/locale b/include/c++/v1/locale index 52885b7..e240799 100644 --- a/include/c++/v1/locale +++ b/include/c++/v1/locale @@ -573,81 +573,81 @@ public: typedef _CharT char_type; typedef _InputIterator iter_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit num_get(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, double& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { return do_get(__b, __e, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const { @@ -657,7 +657,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~num_get() {} template @@ -1261,60 +1261,60 @@ public: typedef _CharT char_type; typedef _OutputIterator iter_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit num_put(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, bool __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, double __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long double __v) const { return do_put(__s, __iob, __fl, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const { @@ -1324,7 +1324,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~num_put() {} virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, @@ -1738,7 +1738,7 @@ protected: virtual const string_type& __x() const; virtual const string_type& __X() const; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~__time_get_c_storage() {} }; @@ -1770,52 +1770,52 @@ public: typedef time_base::dateorder dateorder; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_get(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY dateorder date_order() const { return this->do_date_order(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get_time(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { return do_get_time(__b, __e, __iob, __err, __tm); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get_date(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { return do_get_date(__b, __e, __iob, __err, __tm); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get_weekday(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { return do_get_weekday(__b, __e, __iob, __err, __tm); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get_monthname(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { return do_get_monthname(__b, __e, __iob, __err, __tm); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get_year(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { return do_get_year(__b, __e, __iob, __err, __tm); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm *__tm, char __fmt, char __mod = 0) const @@ -1830,7 +1830,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~time_get() {} virtual dateorder do_date_order() const; @@ -2399,7 +2399,7 @@ protected: explicit __time_get_storage(const char* __nm); explicit __time_get_storage(const string& __nm); - _LIBCPP_ALWAYS_INLINE ~__time_get_storage() {} + _LIBCPP_INLINE_VISIBILITY ~__time_get_storage() {} time_base::dateorder __do_date_order() const; @@ -2458,7 +2458,7 @@ class _LIBCPP_TYPE_VIS __time_put { locale_t __loc_; protected: - _LIBCPP_ALWAYS_INLINE __time_put() : __loc_(_LIBCPP_GET_C_LOCALE) {} + _LIBCPP_INLINE_VISIBILITY __time_put() : __loc_(_LIBCPP_GET_C_LOCALE) {} __time_put(const char* __nm); __time_put(const string& __nm); ~__time_put(); @@ -2477,14 +2477,14 @@ public: typedef _CharT char_type; typedef _OutputIterator iter_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_put(size_t __refs = 0) : locale::facet(__refs) {} iter_type put(iter_type __s, ios_base& __iob, char_type __fl, const tm* __tm, const char_type* __pb, const char_type* __pe) const; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, ios_base& __iob, char_type __fl, const tm* __tm, char __fmt, char __mod = 0) const { @@ -2494,16 +2494,16 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~time_put() {} virtual iter_type do_put(iter_type __s, ios_base&, char_type, const tm* __tm, char __fmt, char __mod) const; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_put(const char* __nm, size_t __refs) : locale::facet(__refs), __time_put(__nm) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_put(const string& __nm, size_t __refs) : locale::facet(__refs), __time_put(__nm) {} @@ -2572,16 +2572,16 @@ class _LIBCPP_TEMPLATE_VIS time_put_byname : public time_put<_CharT, _OutputIterator> { public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_put_byname(const char* __nm, size_t __refs = 0) : time_put<_CharT, _OutputIterator>(__nm, __refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit time_put_byname(const string& __nm, size_t __refs = 0) : time_put<_CharT, _OutputIterator>(__nm, __refs) {} protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~time_put_byname() {} }; @@ -2596,7 +2596,7 @@ public: enum part {none, space, symbol, sign, value}; struct pattern {char field[4];}; - _LIBCPP_ALWAYS_INLINE money_base() {} + _LIBCPP_INLINE_VISIBILITY money_base() {} }; // moneypunct @@ -2610,25 +2610,25 @@ public: typedef _CharT char_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit moneypunct(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE char_type decimal_point() const {return do_decimal_point();} - _LIBCPP_ALWAYS_INLINE char_type thousands_sep() const {return do_thousands_sep();} - _LIBCPP_ALWAYS_INLINE string grouping() const {return do_grouping();} - _LIBCPP_ALWAYS_INLINE string_type curr_symbol() const {return do_curr_symbol();} - _LIBCPP_ALWAYS_INLINE string_type positive_sign() const {return do_positive_sign();} - _LIBCPP_ALWAYS_INLINE string_type negative_sign() const {return do_negative_sign();} - _LIBCPP_ALWAYS_INLINE int frac_digits() const {return do_frac_digits();} - _LIBCPP_ALWAYS_INLINE pattern pos_format() const {return do_pos_format();} - _LIBCPP_ALWAYS_INLINE pattern neg_format() const {return do_neg_format();} + _LIBCPP_INLINE_VISIBILITY char_type decimal_point() const {return do_decimal_point();} + _LIBCPP_INLINE_VISIBILITY char_type thousands_sep() const {return do_thousands_sep();} + _LIBCPP_INLINE_VISIBILITY string grouping() const {return do_grouping();} + _LIBCPP_INLINE_VISIBILITY string_type curr_symbol() const {return do_curr_symbol();} + _LIBCPP_INLINE_VISIBILITY string_type positive_sign() const {return do_positive_sign();} + _LIBCPP_INLINE_VISIBILITY string_type negative_sign() const {return do_negative_sign();} + _LIBCPP_INLINE_VISIBILITY int frac_digits() const {return do_frac_digits();} + _LIBCPP_INLINE_VISIBILITY pattern pos_format() const {return do_pos_format();} + _LIBCPP_INLINE_VISIBILITY pattern neg_format() const {return do_neg_format();} static locale::id id; static const bool intl = _International; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~moneypunct() {} virtual char_type do_decimal_point() const {return numeric_limits::max();} @@ -2668,16 +2668,16 @@ public: typedef _CharT char_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit moneypunct_byname(const char* __nm, size_t __refs = 0) : moneypunct<_CharT, _International>(__refs) {init(__nm);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit moneypunct_byname(const string& __nm, size_t __refs = 0) : moneypunct<_CharT, _International>(__refs) {init(__nm.c_str());} protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~moneypunct_byname() {} virtual char_type do_decimal_point() const {return __decimal_point_;} @@ -2723,7 +2723,7 @@ protected: typedef _CharT char_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE __money_get() {} + _LIBCPP_INLINE_VISIBILITY __money_get() {} static void __gather_info(bool __intl, const locale& __loc, money_base::pattern& __pat, char_type& __dp, @@ -2781,18 +2781,18 @@ public: typedef _InputIterator iter_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit money_get(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { return do_get(__b, __e, __intl, __iob, __err, __v); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, string_type& __v) const { @@ -2803,7 +2803,7 @@ public: protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~money_get() {} virtual iter_type do_get(iter_type __b, iter_type __e, bool __intl, @@ -3163,7 +3163,7 @@ protected: typedef _CharT char_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE __money_put() {} + _LIBCPP_INLINE_VISIBILITY __money_put() {} static void __gather_info(bool __intl, bool __neg, const locale& __loc, money_base::pattern& __pat, char_type& __dp, @@ -3339,18 +3339,18 @@ public: typedef _OutputIterator iter_type; typedef basic_string string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit money_put(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, long double __units) const { return do_put(__s, __intl, __iob, __fl, __units); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY iter_type put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, const string_type& __digits) const { @@ -3360,7 +3360,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~money_put() {} virtual iter_type do_put(iter_type __s, bool __intl, ios_base& __iob, @@ -3489,7 +3489,7 @@ class _LIBCPP_TYPE_VIS messages_base public: typedef ptrdiff_t catalog; - _LIBCPP_ALWAYS_INLINE messages_base() {} + _LIBCPP_INLINE_VISIBILITY messages_base() {} }; template @@ -3501,24 +3501,24 @@ public: typedef _CharT char_type; typedef basic_string<_CharT> string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit messages(size_t __refs = 0) : locale::facet(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY catalog open(const basic_string& __nm, const locale& __loc) const { return do_open(__nm, __loc); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY string_type get(catalog __c, int __set, int __msgid, const string_type& __dflt) const { return do_get(__c, __set, __msgid, __dflt); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void close(catalog __c) const { do_close(__c); @@ -3527,7 +3527,7 @@ public: static locale::id id; protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~messages() {} virtual catalog do_open(const basic_string&, const locale&) const; @@ -3600,16 +3600,16 @@ public: typedef messages_base::catalog catalog; typedef basic_string<_CharT> string_type; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit messages_byname(const char*, size_t __refs = 0) : messages<_CharT>(__refs) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY explicit messages_byname(const string&, size_t __refs = 0) : messages<_CharT>(__refs) {} protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY ~messages_byname() {} }; @@ -3637,43 +3637,43 @@ private: wstring_convert(const wstring_convert& __wc); wstring_convert& operator=(const wstring_convert& __wc); public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT_AFTER_CXX11 wstring_convert(_Codecvt* __pcvt = new _Codecvt); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY wstring_convert(_Codecvt* __pcvt, state_type __state); _LIBCPP_EXPLICIT_AFTER_CXX11 wstring_convert(const byte_string& __byte_err, const wide_string& __wide_err = wide_string()); #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY wstring_convert(wstring_convert&& __wc); #endif ~wstring_convert(); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY wide_string from_bytes(char __byte) {return from_bytes(&__byte, &__byte+1);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY wide_string from_bytes(const char* __ptr) {return from_bytes(__ptr, __ptr + char_traits::length(__ptr));} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY wide_string from_bytes(const byte_string& __str) {return from_bytes(__str.data(), __str.data() + __str.size());} wide_string from_bytes(const char* __first, const char* __last); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY byte_string to_bytes(_Elem __wchar) {return to_bytes(&__wchar, &__wchar+1);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY byte_string to_bytes(const _Elem* __wptr) {return to_bytes(__wptr, __wptr + char_traits<_Elem>::length(__wptr));} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY byte_string to_bytes(const wide_string& __wstr) {return to_bytes(__wstr.data(), __wstr.data() + __wstr.size());} byte_string to_bytes(const _Elem* __first, const _Elem* __last); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY size_t converted() const _NOEXCEPT {return __cvtcount_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY state_type state() const {return __cvtstate_;} }; diff --git a/include/c++/v1/map b/include/c++/v1/map index 8a72260..cfd8280 100644 --- a/include/c++/v1/map +++ b/include/c++/v1/map @@ -40,6 +40,8 @@ public: typedef implementation-defined const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; + typedef unspecified node_type; // C++17 + typedef INSERT_RETURN_TYPE insert_return_type; // C++17 class value_compare : public binary_function @@ -137,6 +139,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list il); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + insert_return_type insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + template pair try_emplace(const key_type& k, Args&&... args); // C++17 template @@ -260,6 +267,7 @@ public: typedef implementation-defined const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; + typedef unspecified node_type; // C++17 class value_compare : public binary_function @@ -349,6 +357,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list il); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + iterator insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -440,12 +453,14 @@ swap(multimap& x, #include <__config> #include <__tree> +#include <__node_handle> #include #include #include #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -470,13 +485,13 @@ public: const _Compare& key_comp() const _NOEXCEPT {return *this;} _LIBCPP_INLINE_VISIBILITY bool operator()(const _CP& __x, const _CP& __y) const - {return static_cast(*this)(__x.__cc.first, __y.__cc.first);} + {return static_cast(*this)(__x.__get_value().first, __y.__get_value().first);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _CP& __x, const _Key& __y) const - {return static_cast(*this)(__x.__cc.first, __y);} + {return static_cast(*this)(__x.__get_value().first, __y);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Key& __x, const _CP& __y) const - {return static_cast(*this)(__x, __y.__cc.first);} + {return static_cast(*this)(__x, __y.__get_value().first);} void swap(__map_value_compare&__y) _NOEXCEPT_(__is_nothrow_swappable<_Compare>::value) { @@ -489,13 +504,13 @@ public: _LIBCPP_INLINE_VISIBILITY typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type operator () ( const _K2& __x, const _CP& __y ) const - {return static_cast(*this) (__x, __y.__cc.first);} + {return static_cast(*this) (__x, __y.__get_value().first);} template _LIBCPP_INLINE_VISIBILITY typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type operator () (const _CP& __x, const _K2& __y) const - {return static_cast(*this) (__x.__cc.first, __y);} + {return static_cast(*this) (__x.__get_value().first, __y);} #endif }; @@ -518,13 +533,13 @@ public: _LIBCPP_INLINE_VISIBILITY bool operator()(const _CP& __x, const _CP& __y) const - {return comp(__x.__cc.first, __y.__cc.first);} + {return comp(__x.__get_value().first, __y.__get_value().first);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _CP& __x, const _Key& __y) const - {return comp(__x.__cc.first, __y);} + {return comp(__x.__get_value().first, __y);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Key& __x, const _CP& __y) const - {return comp(__x, __y.__cc.first);} + {return comp(__x, __y.__get_value().first);} void swap(__map_value_compare&__y) _NOEXCEPT_(__is_nothrow_swappable<_Compare>::value) { @@ -537,13 +552,13 @@ public: _LIBCPP_INLINE_VISIBILITY typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type operator () ( const _K2& __x, const _CP& __y ) const - {return comp (__x, __y.__cc.first);} + {return comp (__x, __y.__get_value().first);} template _LIBCPP_INLINE_VISIBILITY typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type operator () (const _CP& __x, const _K2& __y) const - {return comp (__x.__cc.first, __y);} + {return comp (__x.__get_value().first, __y);} #endif }; @@ -597,9 +612,9 @@ public: void operator()(pointer __p) _NOEXCEPT { if (__second_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__cc.second)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().second)); if (__first_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__cc.first)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().first)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -614,23 +629,67 @@ template class __map_const_iterator; #ifndef _LIBCPP_CXX03_LANG template -union __value_type +struct __value_type { typedef _Key key_type; typedef _Tp mapped_type; typedef pair value_type; - typedef pair __nc_value_type; + typedef pair __nc_ref_pair_type; + typedef pair __nc_rref_pair_type; +private: value_type __cc; - __nc_value_type __nc; + +public: + _LIBCPP_INLINE_VISIBILITY + value_type& __get_value() + { +#if _LIBCPP_STD_VER > 14 + return *_VSTD::launder(_VSTD::addressof(__cc)); +#else + return __cc; +#endif + } + + _LIBCPP_INLINE_VISIBILITY + const value_type& __get_value() const + { +#if _LIBCPP_STD_VER > 14 + return *_VSTD::launder(_VSTD::addressof(__cc)); +#else + return __cc; +#endif + } + + _LIBCPP_INLINE_VISIBILITY + __nc_ref_pair_type __ref() + { + value_type& __v = __get_value(); + return __nc_ref_pair_type(const_cast(__v.first), __v.second); + } + + _LIBCPP_INLINE_VISIBILITY + __nc_rref_pair_type __move() + { + value_type& __v = __get_value(); + return __nc_rref_pair_type( + _VSTD::move(const_cast(__v.first)), + _VSTD::move(__v.second)); + } _LIBCPP_INLINE_VISIBILITY __value_type& operator=(const __value_type& __v) - {__nc = __v.__cc; return *this;} + { + __ref() = __v.__get_value(); + return *this; + } _LIBCPP_INLINE_VISIBILITY __value_type& operator=(__value_type&& __v) - {__nc = _VSTD::move(__v.__nc); return *this;} + { + __ref() = __v.__move(); + return *this; + } template ::type > _LIBCPP_INLINE_VISIBILITY - __value_type& operator=(_ValueTp&& __v) { - __nc = _VSTD::forward<_ValueTp>(__v); return *this; + __value_type& operator=(_ValueTp&& __v) + { + __ref() = _VSTD::forward<_ValueTp>(__v); + return *this; } private: @@ -658,8 +719,15 @@ struct __value_type typedef _Tp mapped_type; typedef pair value_type; +private: value_type __cc; +public: + _LIBCPP_INLINE_VISIBILITY + value_type& __get_value() { return __cc; } + _LIBCPP_INLINE_VISIBILITY + const value_type& __get_value() const { return __cc; } + private: __value_type(); __value_type(__value_type const&); @@ -701,9 +769,9 @@ public: __map_iterator(_TreeIterator __i) _NOEXCEPT : __i_(__i) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __i_->__cc;} + reference operator*() const {return __i_->__get_value();} _LIBCPP_INLINE_VISIBILITY - pointer operator->() const {return pointer_traits::pointer_to(__i_->__cc);} + pointer operator->() const {return pointer_traits::pointer_to(__i_->__get_value());} _LIBCPP_INLINE_VISIBILITY __map_iterator& operator++() {++__i_; return *this;} @@ -764,9 +832,9 @@ public: : __i_(__i.__i_) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __i_->__cc;} + reference operator*() const {return __i_->__get_value();} _LIBCPP_INLINE_VISIBILITY - pointer operator->() const {return pointer_traits::pointer_to(__i_->__cc);} + pointer operator->() const {return pointer_traits::pointer_to(__i_->__get_value());} _LIBCPP_INLINE_VISIBILITY __map_const_iterator& operator++() {++__i_; return *this;} @@ -809,7 +877,6 @@ public: typedef _Key key_type; typedef _Tp mapped_type; typedef pair value_type; - typedef pair __nc_value_type; typedef _Compare key_compare; typedef _Allocator allocator_type; typedef value_type& reference; @@ -854,6 +921,11 @@ public: typedef _VSTD::reverse_iterator reverse_iterator; typedef _VSTD::reverse_iterator const_reverse_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __map_node_handle node_type; + typedef __insert_return_type insert_return_type; +#endif + _LIBCPP_INLINE_VISIBILITY map() _NOEXCEPT_( @@ -1201,6 +1273,35 @@ public: _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT {__tree_.clear();} +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + insert_return_type insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to map::insert()"); + return __tree_.template __node_handle_insert_unique< + node_type, insert_return_type>(_VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to map::insert()"); + return __tree_.template __node_handle_insert_unique( + __hint.__i_, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __tree_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __tree_.template __node_handle_extract(__it.__i_); + } +#endif + _LIBCPP_INLINE_VISIBILITY void swap(map& __m) _NOEXCEPT_(__is_nothrow_swappable<__base>::value) @@ -1308,7 +1409,7 @@ map<_Key, _Tp, _Compare, _Allocator>::map(map&& __m, const allocator_type& __a) const_iterator __e = cend(); while (!__m.empty()) __tree_.__insert_unique(__e.__i_, - _VSTD::move(__m.__tree_.remove(__m.begin().__i_)->__value_.__nc)); + __m.__tree_.remove(__m.begin().__i_)->__value_.__move()); } } @@ -1319,7 +1420,7 @@ map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) return __tree_.__emplace_unique_key_args(__k, _VSTD::piecewise_construct, _VSTD::forward_as_tuple(__k), - _VSTD::forward_as_tuple()).first->__cc.second; + _VSTD::forward_as_tuple()).first->__get_value().second; } template @@ -1329,7 +1430,7 @@ map<_Key, _Tp, _Compare, _Allocator>::operator[](key_type&& __k) return __tree_.__emplace_unique_key_args(__k, _VSTD::piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__k)), - _VSTD::forward_as_tuple()).first->__cc.second; + _VSTD::forward_as_tuple()).first->__get_value().second; } #else // _LIBCPP_CXX03_LANG @@ -1340,9 +1441,9 @@ map<_Key, _Tp, _Compare, _Allocator>::__construct_node_with_key(const key_type& { __node_allocator& __na = __tree_.__node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__cc.first), __k); + __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().first), __k); __h.get_deleter().__first_constructed = true; - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__cc.second)); + __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().second)); __h.get_deleter().__second_constructed = true; return _LIBCPP_EXPLICIT_MOVE(__h); // explicitly moved for C++03 } @@ -1360,7 +1461,7 @@ map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) __tree_.__insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); __r = __h.release(); } - return __r->__value_.__cc.second; + return __r->__value_.__get_value().second; } #endif // _LIBCPP_CXX03_LANG @@ -1375,7 +1476,7 @@ map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) if (__child == nullptr) throw out_of_range("map::at: key not found"); #endif // _LIBCPP_NO_EXCEPTIONS - return static_cast<__node_pointer>(__child)->__value_.__cc.second; + return static_cast<__node_pointer>(__child)->__value_.__get_value().second; } template @@ -1388,7 +1489,7 @@ map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) const if (__child == nullptr) throw out_of_range("map::at: key not found"); #endif // _LIBCPP_NO_EXCEPTIONS - return static_cast<__node_pointer>(__child)->__value_.__cc.second; + return static_cast<__node_pointer>(__child)->__value_.__get_value().second; } @@ -1465,7 +1566,6 @@ public: typedef _Key key_type; typedef _Tp mapped_type; typedef pair value_type; - typedef pair __nc_value_type; typedef _Compare key_compare; typedef _Allocator allocator_type; typedef value_type& reference; @@ -1511,6 +1611,10 @@ public: typedef _VSTD::reverse_iterator reverse_iterator; typedef _VSTD::reverse_iterator const_reverse_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __map_node_handle node_type; +#endif + _LIBCPP_INLINE_VISIBILITY multimap() _NOEXCEPT_( @@ -1749,8 +1853,39 @@ public: _LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __f, const_iterator __l) {return __tree_.erase(__f.__i_, __l.__i_);} + +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + iterator insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multimap::insert()"); + return __tree_.template __node_handle_insert_multi( + _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multimap::insert()"); + return __tree_.template __node_handle_insert_multi( + __hint.__i_, _VSTD::move(__nh)); + } _LIBCPP_INLINE_VISIBILITY - void clear() {__tree_.clear();} + node_type extract(key_type const& __key) + { + return __tree_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __tree_.template __node_handle_extract( + __it.__i_); + } +#endif + + _LIBCPP_INLINE_VISIBILITY + void clear() _NOEXCEPT {__tree_.clear();} _LIBCPP_INLINE_VISIBILITY void swap(multimap& __m) @@ -1852,7 +1987,7 @@ multimap<_Key, _Tp, _Compare, _Allocator>::multimap(multimap&& __m, const alloca const_iterator __e = cend(); while (!__m.empty()) __tree_.__insert_multi(__e.__i_, - _VSTD::move(__m.__tree_.remove(__m.begin().__i_)->__value_.__nc)); + _VSTD::move(__m.__tree_.remove(__m.begin().__i_)->__value_.__move())); } } #endif diff --git a/include/c++/v1/math.h b/include/c++/v1/math.h index cd055ca..3cc72aa 100644 --- a/include/c++/v1/math.h +++ b/include/c++/v1/math.h @@ -314,7 +314,7 @@ extern "C++" { #ifdef signbit template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_signbit(_A1 __lcpp_x) _NOEXCEPT { @@ -376,7 +376,7 @@ signbit(_A1) _NOEXCEPT #ifdef fpclassify template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY int __libcpp_fpclassify(_A1 __lcpp_x) _NOEXCEPT { @@ -422,7 +422,7 @@ fpclassify(_A1 __lcpp_x) _NOEXCEPT #ifdef isfinite template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isfinite(_A1 __lcpp_x) _NOEXCEPT { @@ -456,7 +456,7 @@ isfinite(_A1) _NOEXCEPT #ifdef isinf template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isinf(_A1 __lcpp_x) _NOEXCEPT { @@ -483,6 +483,20 @@ typename std::enable_if< isinf(_A1) _NOEXCEPT { return false; } +#ifdef _LIBCPP_PREFERRED_OVERLOAD +inline _LIBCPP_INLINE_VISIBILITY +bool +isinf(float __lcpp_x) _NOEXCEPT { return __libcpp_isinf(__lcpp_x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD +bool +isinf(double __lcpp_x) _NOEXCEPT { return __libcpp_isinf(__lcpp_x); } + +inline _LIBCPP_INLINE_VISIBILITY +bool +isinf(long double __lcpp_x) _NOEXCEPT { return __libcpp_isinf(__lcpp_x); } +#endif + #endif // isinf // isnan @@ -490,7 +504,7 @@ isinf(_A1) _NOEXCEPT #ifdef isnan template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isnan(_A1 __lcpp_x) _NOEXCEPT { @@ -513,6 +527,20 @@ typename std::enable_if::value, bool>::type isnan(_A1) _NOEXCEPT { return false; } +#ifdef _LIBCPP_PREFERRED_OVERLOAD +inline _LIBCPP_INLINE_VISIBILITY +bool +isnan(float __lcpp_x) _NOEXCEPT { return __libcpp_isnan(__lcpp_x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD +bool +isnan(double __lcpp_x) _NOEXCEPT { return __libcpp_isnan(__lcpp_x); } + +inline _LIBCPP_INLINE_VISIBILITY +bool +isnan(long double __lcpp_x) _NOEXCEPT { return __libcpp_isnan(__lcpp_x); } +#endif + #endif // isnan // isnormal @@ -520,7 +548,7 @@ isnan(_A1) _NOEXCEPT #ifdef isnormal template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isnormal(_A1 __lcpp_x) _NOEXCEPT { @@ -550,7 +578,7 @@ isnormal(_A1 __lcpp_x) _NOEXCEPT #ifdef isgreater template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isgreater(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { @@ -580,7 +608,7 @@ isgreater(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT #ifdef isgreaterequal template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isgreaterequal(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { @@ -610,7 +638,7 @@ isgreaterequal(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT #ifdef isless template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isless(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { @@ -640,7 +668,7 @@ isless(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT #ifdef islessequal template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_islessequal(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { @@ -670,7 +698,7 @@ islessequal(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT #ifdef islessgreater template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_islessgreater(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { @@ -700,7 +728,7 @@ islessgreater(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT #ifdef isunordered template -_LIBCPP_ALWAYS_INLINE +_LIBCPP_INLINE_VISIBILITY bool __libcpp_isunordered(_A1 __lcpp_x, _A2 __lcpp_y) _NOEXCEPT { diff --git a/include/c++/v1/memory b/include/c++/v1/memory index 021bd13..6e292a5 100644 --- a/include/c++/v1/memory +++ b/include/c++/v1/memory @@ -212,10 +212,10 @@ template template ForwardIterator uninitialized_default_construct_n(ForwardIterator first, Size n); -template struct auto_ptr_ref {}; // removed in C++17 +template struct auto_ptr_ref {}; // deprecated in C++11, removed in C++17 template -class auto_ptr // removed in C++17 +class auto_ptr // deprecated in C++11, removed in C++17 { public: typedef X element_type; @@ -667,6 +667,7 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space); #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) # include #endif +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -679,7 +680,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY _ValueType __libcpp_relaxed_load(_ValueType const* __value) { #if !defined(_LIBCPP_HAS_NO_THREADS) && \ defined(__ATOMIC_RELAXED) && \ @@ -691,7 +692,7 @@ _ValueType __libcpp_relaxed_load(_ValueType const* __value) { } template -inline _LIBCPP_ALWAYS_INLINE +inline _LIBCPP_INLINE_VISIBILITY _ValueType __libcpp_acquire_load(_ValueType const* __value) { #if !defined(_LIBCPP_HAS_NO_THREADS) && \ defined(__ATOMIC_ACQUIRE) && \ @@ -1782,7 +1783,7 @@ public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 allocator() _NOEXCEPT {} - template + template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 allocator(const allocator<_Up>&) _NOEXCEPT {} @@ -1790,7 +1791,7 @@ public: {return _VSTD::addressof(__x);} _LIBCPP_INLINE_VISIBILITY const_pointer address(const_reference __x) const _NOEXCEPT {return _VSTD::addressof(__x);} - _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY pointer allocate(size_type __n, allocator::const_pointer = 0) { if (__n > max_size()) @@ -1887,7 +1888,7 @@ public: allocator() _NOEXCEPT {} template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 allocator(const allocator<_Up>&) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY const_pointer address(const_reference __x) const _NOEXCEPT @@ -1989,10 +1990,10 @@ public: _LIBCPP_INLINE_VISIBILITY explicit raw_storage_iterator(_OutputIterator __x) : __x_(__x) {} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator*() {return *this;} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator=(const _Tp& __element) - {::new(&*__x_) _Tp(__element); return *this;} + {::new(_VSTD::addressof(*__x_)) _Tp(__element); return *this;} #if _LIBCPP_STD_VER >= 14 _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator=(_Tp&& __element) - {::new(&*__x_) _Tp(_VSTD::move(__element)); return *this;} + {::new(_VSTD::addressof(*__x_)) _Tp(_VSTD::move(__element)); return *this;} #endif _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator++() {++__x_; return *this;} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator operator++(int) @@ -2003,7 +2004,7 @@ public: }; template -_LIBCPP_NO_CFI +_LIBCPP_NODISCARD_EXT _LIBCPP_NO_CFI pair<_Tp*, ptrdiff_t> get_temporary_buffer(ptrdiff_t __n) _NOEXCEPT { @@ -2056,13 +2057,13 @@ void return_temporary_buffer(_Tp* __p) _NOEXCEPT #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template -struct auto_ptr_ref +struct _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr_ref { _Tp* __ptr_; }; template -class _LIBCPP_TEMPLATE_VIS auto_ptr +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr { private: _Tp* __ptr_; @@ -2106,7 +2107,7 @@ public: }; template <> -class _LIBCPP_TEMPLATE_VIS auto_ptr +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr { public: typedef void element_type; @@ -2130,7 +2131,9 @@ struct __compressed_pair_elem { _LIBCPP_INLINE_VISIBILITY constexpr explicit __compressed_pair_elem(_Up&& __u) - : __value_(_VSTD::forward<_Up>(__u)){}; + : __value_(_VSTD::forward<_Up>(__u)) + { + } template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 @@ -2167,7 +2170,8 @@ struct __compressed_pair_elem<_Tp, _Idx, true> : private _Tp { _LIBCPP_INLINE_VISIBILITY constexpr explicit __compressed_pair_elem(_Up&& __u) - : __value_type(_VSTD::forward<_Up>(__u)){}; + : __value_type(_VSTD::forward<_Up>(__u)) + {} template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 @@ -3483,7 +3487,7 @@ public: virtual const char* what() const _NOEXCEPT; }; -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_weak_ptr() { #ifndef _LIBCPP_NO_EXCEPTIONS @@ -3511,7 +3515,7 @@ public: explicit __shared_count(long __refs = 0) _NOEXCEPT : __shared_owners_(__refs) {} -#if defined(_LIBCPP_BUILDING_MEMORY) && \ +#if defined(_LIBCPP_BUILDING_LIBRARY) && \ defined(_LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS) void __add_shared() _NOEXCEPT; bool __release_shared() _NOEXCEPT; @@ -3549,7 +3553,7 @@ protected: virtual ~__shared_weak_count(); public: -#if defined(_LIBCPP_BUILDING_MEMORY) && \ +#if defined(_LIBCPP_BUILDING_LIBRARY) && \ defined(_LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS) void __add_shared() _NOEXCEPT; void __add_weak() _NOEXCEPT; @@ -3682,7 +3686,7 @@ private: virtual void __on_zero_shared_weak() _NOEXCEPT; public: _LIBCPP_INLINE_VISIBILITY - _Tp* get() _NOEXCEPT {return &__data_.second();} + _Tp* get() _NOEXCEPT {return _VSTD::addressof(__data_.second());} }; template @@ -5549,7 +5553,7 @@ struct _LIBCPP_TYPE_VIS pointer_safety #endif #if !defined(_LIBCPP_ABI_POINTER_SAFETY_ENUM_TYPE) && \ - defined(_LIBCPP_BUILDING_MEMORY) + defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_FUNC_VIS pointer_safety get_pointer_safety() _NOEXCEPT; #else // This function is only offered in C++03 under ABI v1. @@ -5631,22 +5635,25 @@ struct __temp_value { _Tp & get() { return *__addr(); } template - __temp_value(_Alloc &__alloc, _Args&& ... __args) : __a(__alloc) - { _Traits::construct(__a, __addr(), _VSTD::forward<_Args>(__args)...); } + _LIBCPP_NO_CFI + __temp_value(_Alloc &__alloc, _Args&& ... __args) : __a(__alloc) { + _Traits::construct(__a, reinterpret_cast<_Tp*>(addressof(__v)), + _VSTD::forward<_Args>(__args)...); + } ~__temp_value() { _Traits::destroy(__a, __addr()); } }; #endif -#if _LIBCPP_STD_VER > 14 -template +template struct __is_allocator : false_type {}; template struct __is_allocator<_Alloc, - void_t().allocate(size_t{}))>> + typename __void_t::type, + typename __void_t().allocate(size_t(0)))>::type + > : true_type {}; -#endif _LIBCPP_END_NAMESPACE_STD diff --git a/include/c++/v1/module.modulemap b/include/c++/v1/module.modulemap index 4739386..681c80b 100644 --- a/include/c++/v1/module.modulemap +++ b/include/c++/v1/module.modulemap @@ -228,6 +228,10 @@ module std [system] { header "atomic" export * } + module bit { + header "bit" + export * + } module bitset { header "bitset" export string @@ -235,6 +239,10 @@ module std [system] { export * } // No submodule for cassert. It fundamentally needs repeated, textual inclusion. + module charconv { + header "charconv" + export * + } module chrono { header "chrono" export * @@ -264,6 +272,10 @@ module std [system] { header "exception" export * } + module filesystem { + header "filesystem" + export * + } module forward_list { header "forward_list" export initializer_list @@ -482,6 +494,7 @@ module std [system] { // FIXME: These should be private. module __bit_reference { header "__bit_reference" export * } module __debug { header "__debug" export * } + module __errc { header "__errc" export * } module __functional_base { header "__functional_base" export * } module __hash_table { header "__hash_table" export * } module __locale { header "__locale" export * } @@ -493,6 +506,7 @@ module std [system] { module __tree { header "__tree" export * } module __tuple { header "__tuple" export * } module __undef_macros { header "__undef_macros" export * } + module __node_handle { header "__node_handle" export * } module experimental { requires cplusplus11 @@ -550,10 +564,18 @@ module std [system] { header "experimental/regex" export * } + module simd { + header "experimental/simd" + export * + } module set { header "experimental/set" export * } + module span { + header "span" + export * + } module string { header "experimental/string" export * diff --git a/include/c++/v1/mutex b/include/c++/v1/mutex index 52e39b0..9c55c7c 100644 --- a/include/c++/v1/mutex +++ b/include/c++/v1/mutex @@ -194,6 +194,7 @@ template #ifndef _LIBCPP_CXX03_LANG #include #endif +#include #include <__threading_support> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/include/c++/v1/new b/include/c++/v1/new index e42ffad..7c3076c 100644 --- a/include/c++/v1/new +++ b/include/c++/v1/new @@ -91,6 +91,7 @@ void operator delete[](void* ptr, void*) noexcept; #include #include #include +#include #ifdef _LIBCPP_NO_EXCEPTIONS #include #endif @@ -103,18 +104,11 @@ void operator delete[](void* ptr, void*) noexcept; #pragma GCC system_header #endif -#if !(defined(_LIBCPP_BUILDING_NEW) || _LIBCPP_STD_VER >= 14 || \ +#if !(defined(_LIBCPP_BUILDING_LIBRARY) || _LIBCPP_STD_VER >= 14 || \ (defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309)) # define _LIBCPP_HAS_NO_SIZED_DEALLOCATION #endif -#if !defined(_LIBCPP_HAS_NO_ALIGNED_ALLOCATION) && \ - (!(defined(_LIBCPP_BUILDING_NEW) || _LIBCPP_STD_VER > 14 || \ - (defined(__cpp_aligned_new) && __cpp_aligned_new >= 201606))) -# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION -#endif - - #if !__has_builtin(__builtin_operator_new) || \ __has_builtin(__builtin_operator_new) < 201802L || \ defined(_LIBCPP_HAS_NO_ALIGNED_ALLOCATION) || \ @@ -167,7 +161,7 @@ public: #define _LIBCPP_BAD_ARRAY_LENGTH_DEFINED -#endif // defined(_LIBCPP_BUILDING_NEW) || (_LIBCPP_STD_VER > 11) +#endif // defined(_LIBCPP_BUILDING_LIBRARY) || (_LIBCPP_STD_VER > 11) #if !defined(_LIBCPP_ABI_MICROSOFT) || defined(_LIBCPP_NO_VCRUNTIME) #if !defined(_LIBCPP_HAS_NO_ALIGNED_ALLOCATION) || _LIBCPP_STD_VER > 14 @@ -177,6 +171,10 @@ enum class _LIBCPP_ENUM_VIS align_val_t : size_t { }; enum align_val_t { __zero = 0, __max = (size_t)-1 }; #endif #endif +#elif !defined(__cpp_aligned_new) +// We're defering to Microsoft's STL to provide aligned new et al. We don't +// have it unless the language feature test macro is defined. +#define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION #endif } // std @@ -281,7 +279,7 @@ inline _LIBCPP_INLINE_VISIBILITY void __libcpp_deallocate(void* __ptr, size_t __ } #ifdef _LIBCPP_BAD_ARRAY_LENGTH_DEFINED -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY #ifndef _LIBCPP_NO_EXCEPTIONS _LIBCPP_AVAILABILITY_BAD_ARRAY_LENGTH #endif @@ -290,13 +288,13 @@ void __throw_bad_array_length() #ifndef _LIBCPP_NO_EXCEPTIONS throw bad_array_length(); #else - _VSTD::abort(); + _VSTD::abort(); #endif } #endif template -_LIBCPP_NODISCARD_AFTER_CXX17 inline +_LIBCPP_NODISCARD_AFTER_CXX17 inline _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT { static_assert (!(is_function<_Tp>::value), "can't launder functions" ); diff --git a/include/c++/v1/numeric b/include/c++/v1/numeric index b33b6a3..4e68239 100644 --- a/include/c++/v1/numeric +++ b/include/c++/v1/numeric @@ -104,15 +104,15 @@ template - OutputIterator - transform_inclusive_scan(InputIterator first, InputIterator last, + OutputIterator + transform_inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, UnaryOperation unary_op); // C++17 template - OutputIterator - transform_inclusive_scan(InputIterator first, InputIterator last, + OutputIterator + transform_inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, UnaryOperation unary_op, T init); // C++17 @@ -142,6 +142,7 @@ template #include #include // for numeric_limits #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/include/c++/v1/optional b/include/c++/v1/optional index 1f8e491..9ef0aac 100644 --- a/include/c++/v1/optional +++ b/include/c++/v1/optional @@ -139,6 +139,10 @@ namespace std { private: T *val; // exposition only }; + +template + optional(T) -> optional; + } // namespace std */ @@ -152,6 +156,7 @@ namespace std { #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -1003,6 +1008,11 @@ private: } }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template + optional(T) -> optional; +#endif + // Comparisons between optionals template _LIBCPP_INLINE_VISIBILITY constexpr diff --git a/include/c++/v1/ostream b/include/c++/v1/ostream index 197636e..5404e0d 100644 --- a/include/c++/v1/ostream +++ b/include/c++/v1/ostream @@ -232,7 +232,7 @@ public: basic_ostream& seekp(off_type __off, ios_base::seekdir __dir); protected: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY basic_ostream() {} // extension, intentially does not initialize }; @@ -249,7 +249,7 @@ public: explicit sentry(basic_ostream<_CharT, _Traits>& __os); ~sentry(); - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const {return __ok_;} }; diff --git a/include/c++/v1/queue b/include/c++/v1/queue index 80546fd..4677e52 100644 --- a/include/c++/v1/queue +++ b/include/c++/v1/queue @@ -69,6 +69,12 @@ public: void swap(queue& q) noexcept(is_nothrow_swappable_v) }; +template + queue(Container) -> queue; // C++17 + +template + queue(Container, Allocator) -> queue; // C++17 + template bool operator==(const queue& x,const queue& y); @@ -157,6 +163,20 @@ public: is_nothrow_swappable_v) }; +template +priority_queue(Compare, Container) + -> priority_queue; // C++17 + +template::value_type>, + class Container = vector::value_type>> +priority_queue(InputIterator, InputIterator, Compare = Compare(), Container = Container()) + -> priority_queue::value_type, Container, Compare>; // C++17 + +template +priority_queue(Compare, Container, Allocator) + -> priority_queue; // C++17 + template void swap(priority_queue& x, priority_queue& y) @@ -321,6 +341,22 @@ public: operator< (const queue<_T1, _C1>& __x,const queue<_T1, _C1>& __y); }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value, nullptr_t>::type +> +queue(_Container) + -> queue; + +template::value, nullptr_t>::type, + class = typename enable_if< __is_allocator<_Alloc>::value, nullptr_t>::type +> +queue(_Container, _Alloc) + -> queue; +#endif + template inline _LIBCPP_INLINE_VISIBILITY bool @@ -515,6 +551,36 @@ public: __is_nothrow_swappable::value); }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template ::value, nullptr_t>::type, + class = typename enable_if::value, nullptr_t>::type +> +priority_queue(_Compare, _Container) + -> priority_queue; + +template::value_type>, + class _Container = vector::value_type>, + class = typename enable_if< __is_input_iterator<_InputIterator>::value, nullptr_t>::type, + class = typename enable_if::value, nullptr_t>::type, + class = typename enable_if::value, nullptr_t>::type +> +priority_queue(_InputIterator, _InputIterator, _Compare = _Compare(), _Container = _Container()) + -> priority_queue::value_type, _Container, _Compare>; + +template::value, nullptr_t>::type, + class = typename enable_if::value, nullptr_t>::type, + class = typename enable_if< __is_allocator<_Alloc>::value, nullptr_t>::type +> +priority_queue(_Compare, _Container, _Alloc) + -> priority_queue; +#endif + template inline priority_queue<_Tp, _Container, _Compare>::priority_queue(const _Compare& __comp, diff --git a/include/c++/v1/random b/include/c++/v1/random index 89664a6..724bd0f 100644 --- a/include/c++/v1/random +++ b/include/c++/v1/random @@ -2337,7 +2337,7 @@ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, for (size_t __i = 1; __i < __n; ++__i) if (__x_[__i] != 0) return; - __x_[0] = _Max; + __x_[0] = result_type(1) << (__w - 1); } } @@ -2363,7 +2363,7 @@ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, for (size_t __i = 1; __i < __n; ++__i) if (__x_[__i] != 0) return; - __x_[0] = _Max; + __x_[0] = result_type(1) << (__w - 1); } } diff --git a/include/c++/v1/regex b/include/c++/v1/regex index ed0781c..1858500 100644 --- a/include/c++/v1/regex +++ b/include/c++/v1/regex @@ -192,6 +192,11 @@ public: void swap(basic_regex&); }; +template +basic_regex(ForwardIterator, ForwardIterator, + regex_constants::syntax_option_type = regex_constants::ECMAScript) + -> basic_regex::value_type>; // C++17 + typedef basic_regex regex; typedef basic_regex wregex; @@ -764,6 +769,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -963,7 +969,7 @@ public: }; template -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_regex_error() { #ifndef _LIBCPP_NO_EXCEPTIONS @@ -1346,9 +1352,9 @@ public: virtual ~__node() {} _LIBCPP_INLINE_VISIBILITY - virtual void __exec(__state&) const {}; + virtual void __exec(__state&) const {} _LIBCPP_INLINE_VISIBILITY - virtual void __exec_split(bool, __state&) const {}; + virtual void __exec_split(bool, __state&) const {} }; // __end_state @@ -2409,20 +2415,17 @@ __bracket_expression<_CharT, _Traits>::__exec(__state& __s) const goto __exit; } } - // set of "__found" chars = + // When there's at least one of __neg_chars_ and __neg_mask_, the set + // of "__found" chars is // union(complement(union(__neg_chars_, __neg_mask_)), // other cases...) // - // __neg_chars_ and __neg_mask_'d better be handled together, as there - // are no short circuit opportunities. - // - // In addition, when __neg_mask_/__neg_chars_ is empty, they should be - // treated as all ones/all chars. + // It doesn't make sense to check this when there are no __neg_chars_ + // and no __neg_mask_. + if (!(__neg_mask_ == 0 && __neg_chars_.empty())) { - const bool __in_neg_mask = (__neg_mask_ == 0) || - __traits_.isctype(__ch, __neg_mask_); + const bool __in_neg_mask = __traits_.isctype(__ch, __neg_mask_); const bool __in_neg_chars = - __neg_chars_.empty() || std::find(__neg_chars_.begin(), __neg_chars_.end(), __ch) != __neg_chars_.end(); if (!(__in_neg_mask || __in_neg_chars)) @@ -2922,6 +2925,15 @@ private: template friend class __lookahead; }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template ::value, nullptr_t>::type +> +basic_regex(_ForwardIterator, _ForwardIterator, + regex_constants::syntax_option_type = regex_constants::ECMAScript) + -> basic_regex::value_type>; +#endif + template const regex_constants::syntax_option_type basic_regex<_CharT, _Traits>::icase; template diff --git a/include/c++/v1/scoped_allocator b/include/c++/v1/scoped_allocator index 4760d94..bdbb013 100644 --- a/include/c++/v1/scoped_allocator +++ b/include/c++/v1/scoped_allocator @@ -108,6 +108,7 @@ template #include <__config> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/include/c++/v1/set b/include/c++/v1/set index 94db8c7..f2ce6ea 100644 --- a/include/c++/v1/set +++ b/include/c++/v1/set @@ -40,6 +40,8 @@ public: typedef implementation-defined const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; + typedef unspecified node_type; // C++17 + typedef INSERT_RETURN_TYPE insert_return_type; // C++17 // construct/copy/destroy: set() @@ -115,6 +117,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list il); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + insert_return_type insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -222,6 +229,7 @@ public: typedef implementation-defined const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; + typedef unspecified node_type; // C++17 // construct/copy/destroy: multiset() @@ -297,6 +305,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list il); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + iterator insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -387,7 +400,9 @@ swap(multiset& x, multiset& y) #include <__config> #include <__tree> +#include <__node_handle> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -429,6 +444,11 @@ public: typedef _VSTD::reverse_iterator reverse_iterator; typedef _VSTD::reverse_iterator const_reverse_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __set_node_handle node_type; + typedef __insert_return_type insert_return_type; +#endif + _LIBCPP_INLINE_VISIBILITY set() _NOEXCEPT_( @@ -634,6 +654,35 @@ public: _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT {__tree_.clear();} +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + insert_return_type insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to set::insert()"); + return __tree_.template __node_handle_insert_unique< + node_type, insert_return_type>(_VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to set::insert()"); + return __tree_.template __node_handle_insert_unique( + __hint, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __tree_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __tree_.template __node_handle_extract(__it); + } +#endif + _LIBCPP_INLINE_VISIBILITY void swap(set& __s) _NOEXCEPT_(__is_nothrow_swappable<__base>::value) {__tree_.swap(__s.__tree_);} @@ -838,6 +887,10 @@ public: typedef _VSTD::reverse_iterator reverse_iterator; typedef _VSTD::reverse_iterator const_reverse_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __set_node_handle node_type; +#endif + // construct/copy/destroy: _LIBCPP_INLINE_VISIBILITY multiset() @@ -1042,6 +1095,35 @@ public: _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT {__tree_.clear();} +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + iterator insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multiset::insert()"); + return __tree_.template __node_handle_insert_multi( + _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multiset::insert()"); + return __tree_.template __node_handle_insert_multi( + __hint, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __tree_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __tree_.template __node_handle_extract(__it); + } +#endif + _LIBCPP_INLINE_VISIBILITY void swap(multiset& __s) _NOEXCEPT_(__is_nothrow_swappable<__base>::value) diff --git a/include/c++/v1/shared_mutex b/include/c++/v1/shared_mutex index 9cb8152..fbde0cf 100644 --- a/include/c++/v1/shared_mutex +++ b/include/c++/v1/shared_mutex @@ -124,12 +124,13 @@ template */ #include <__config> +#include _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if _LIBCPP_STD_VER > 11 || defined(_LIBCPP_BUILDING_SHARED_MUTEX) +#if _LIBCPP_STD_VER > 11 || defined(_LIBCPP_BUILDING_LIBRARY) #include <__mutex_base> diff --git a/include/c++/v1/span b/include/c++/v1/span new file mode 100644 index 0000000..ea7aecb --- /dev/null +++ b/include/c++/v1/span @@ -0,0 +1,608 @@ +// -*- C++ -*- +//===------------------------------ span ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP_SPAN +#define _LIBCPP_SPAN + +/* + span synopsis + +namespace std { + +// constants +inline constexpr ptrdiff_t dynamic_extent = -1; + +// [views.span], class template span +template + class span; + +// [span.comparison], span comparison operators +template + constexpr bool operator==(span l, span r); +template + constexpr bool operator!=(span l, span r); +template + constexpr bool operator<(span l, span r); +template + constexpr bool operator<=(span l, span r); +template + constexpr bool operator>(span l, span r); +template + constexpr bool operator>=(span l, span r); + +// [span.objectrep], views of object representation +template + span(sizeof(ElementType)) * Extent))> as_bytes(span s) noexcept; + +template + span< byte, ((Extent == dynamic_extent) ? dynamic_extent : + (static_cast(sizeof(ElementType)) * Extent))> as_writable_bytes(span s) noexcept; + + +namespace std { +template +class span { +public: + // constants and types + using element_type = ElementType; + using value_type = remove_cv_t; + using index_type = ptrdiff_t; + using difference_type = ptrdiff_t; + using pointer = element_type*; + using reference = element_type&; + using iterator = implementation-defined; + using const_iterator = implementation-defined; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + static constexpr index_type extent = Extent; + + // [span.cons], span constructors, copy, assignment, and destructor + constexpr span() noexcept; + constexpr span(pointer ptr, index_type count); + constexpr span(pointer firstElem, pointer lastElem); + template + constexpr span(element_type (&arr)[N]) noexcept; + template + constexpr span(array& arr) noexcept; + template + constexpr span(const array& arr) noexcept; + template + constexpr span(Container& cont); + template + constexpr span(const Container& cont); + constexpr span(const span& other) noexcept = default; + template + constexpr span(const span& s) noexcept; + ~span() noexcept = default; + constexpr span& operator=(const span& other) noexcept = default; + + // [span.sub], span subviews + template + constexpr span first() const; + template + constexpr span last() const; + template + constexpr span subspan() const; + + constexpr span first(index_type count) const; + constexpr span last(index_type count) const; + constexpr span subspan(index_type offset, index_type count = dynamic_extent) const; + + // [span.obs], span observers + constexpr index_type size() const noexcept; + constexpr index_type size_bytes() const noexcept; + constexpr bool empty() const noexcept; + + // [span.elem], span element access + constexpr reference operator[](index_type idx) const; + constexpr reference operator()(index_type idx) const; + constexpr pointer data() const noexcept; + + // [span.iterators], span iterator support + constexpr iterator begin() const noexcept; + constexpr iterator end() const noexcept; + constexpr const_iterator cbegin() const noexcept; + constexpr const_iterator cend() const noexcept; + constexpr reverse_iterator rbegin() const noexcept; + constexpr reverse_iterator rend() const noexcept; + constexpr const_reverse_iterator crbegin() const noexcept; + constexpr const_reverse_iterator crend() const noexcept; + +private: + pointer data_; // exposition only + index_type size_; // exposition only +}; + +template + span(T (&)[N]) -> span; + +template + span(array&) -> span; + +template + span(const array&) -> span; + +template + span(Container&) -> span; + +template + span(const Container&) -> span; + +} // namespace std + +*/ + +#include <__config> +#include // for ptrdiff_t +#include // for iterators +#include // for array +#include // for remove_cv, etc +#include // for byte + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +inline constexpr ptrdiff_t dynamic_extent = -1; +template class span; + + +template +struct __is_span_impl : public false_type {}; + +template +struct __is_span_impl> : public true_type {}; + +template +struct __is_span : public __is_span_impl> {}; + +template +struct __is_std_array_impl : public false_type {}; + +template +struct __is_std_array_impl> : public true_type {}; + +template +struct __is_std_array : public __is_std_array_impl> {}; + +template +struct __is_span_compatible_container : public false_type {}; + +template +struct __is_span_compatible_container<_Tp, _ElementType, + void_t< + // is not a specialization of span + typename enable_if::value, nullptr_t>::type, + // is not a specialization of array + typename enable_if::value, nullptr_t>::type, + // is_array_v is false, + typename enable_if, nullptr_t>::type, + // data(cont) and size(cont) are well formed + decltype(data(declval<_Tp>())), + decltype(size(declval<_Tp>())), + // remove_pointer_t(*)[] is convertible to ElementType(*)[] + typename enable_if< + is_convertible_v()))>(*)[], + _ElementType(*)[]>, + nullptr_t>::type + >> + : public true_type {}; + + +template +class _LIBCPP_TEMPLATE_VIS span { +public: +// constants and types + using element_type = _Tp; + using value_type = remove_cv_t<_Tp>; + using index_type = ptrdiff_t; + using difference_type = ptrdiff_t; + using pointer = _Tp *; + using const_pointer = const _Tp *; // not in standard + using reference = _Tp &; + using const_reference = const _Tp &; // not in standard + using iterator = __wrap_iter; + using const_iterator = __wrap_iter; + using reverse_iterator = _VSTD::reverse_iterator; + using const_reverse_iterator = _VSTD::reverse_iterator; + + static constexpr index_type extent = _Extent; + static_assert (_Extent >= 0, "Can't have a span with an extent < 0"); + +// [span.cons], span constructors, copy, assignment, and destructor + _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr} + { static_assert(_Extent == 0, "Can't default construct a statically sized span with size > 0"); } + + constexpr span (const span&) noexcept = default; + constexpr span& operator=(const span&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __ptr, index_type __count) : __data{__ptr} + { (void)__count; _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (ptr, len)"); } + _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __f, pointer __l) : __data{__f} + { (void)__l; _LIBCPP_ASSERT(_Extent == distance(__f, __l), "size mismatch in span's constructor (ptr, ptr)"); } + + _LIBCPP_INLINE_VISIBILITY constexpr span(element_type (&__arr)[_Extent]) noexcept : __data{__arr} {} + _LIBCPP_INLINE_VISIBILITY constexpr span( array& __arr) noexcept : __data{__arr.data()} {} + _LIBCPP_INLINE_VISIBILITY constexpr span(const array& __arr) noexcept : __data{__arr.data()} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span( _Container& __c, + enable_if_t<__is_span_compatible_container<_Container, _Tp>::value, nullptr_t> = nullptr) + : __data{_VSTD::data(__c)} + { _LIBCPP_ASSERT(_Extent == _VSTD::size(__c), "size mismatch in span's constructor (container)"); } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const _Container& __c, + enable_if_t<__is_span_compatible_container::value, nullptr_t> = nullptr) + : __data{_VSTD::data(__c)} + { _LIBCPP_ASSERT(_Extent == _VSTD::size(__c), "size mismatch in span's constructor (const container)"); } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const span<_OtherElementType, _Extent>& __other, + enable_if_t< + is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, + nullptr_t> = nullptr) + : __data{__other.data()} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const span<_OtherElementType, dynamic_extent>& __other, + enable_if_t< + is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, + nullptr_t> = nullptr) noexcept + : __data{__other.data()} { _LIBCPP_ASSERT(_Extent == __other.size(), "size mismatch in span's constructor (other span)"); } + + +// ~span() noexcept = default; + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span first() const noexcept + { + static_assert(_Count >= 0, "Count must be >= 0 in span::first()"); + static_assert(_Count <= _Extent, "Count out of range in span::first()"); + return {data(), _Count}; + } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span last() const noexcept + { + static_assert(_Count >= 0, "Count must be >= 0 in span::last()"); + static_assert(_Count <= _Extent, "Count out of range in span::last()"); + return {data() + size() - _Count, _Count}; + } + + _LIBCPP_INLINE_VISIBILITY + constexpr span first(index_type __count) const noexcept + { + _LIBCPP_ASSERT(__count >= 0 && __count <= size(), "Count out of range in span::first(count)"); + return {data(), __count}; + } + + _LIBCPP_INLINE_VISIBILITY + constexpr span last(index_type __count) const noexcept + { + _LIBCPP_ASSERT(__count >= 0 && __count <= size(), "Count out of range in span::last(count)"); + return {data() + size() - __count, __count}; + } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr auto subspan() const noexcept + -> span + { + _LIBCPP_ASSERT(_Offset >= 0 && _Offset <= size(), "Offset out of range in span::subspan()"); + return {data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; + } + + + inline _LIBCPP_INLINE_VISIBILITY + constexpr span + subspan(index_type __offset, index_type __count = dynamic_extent) const noexcept + { + _LIBCPP_ASSERT( __offset >= 0 && __offset <= size(), "Offset out of range in span::subspan(offset, count)"); + _LIBCPP_ASSERT((__count >= 0 && __count <= size()) || __count == dynamic_extent, "Count out of range in span::subspan(offset, count)"); + if (__count == dynamic_extent) + return {data() + __offset, size() - __offset}; + _LIBCPP_ASSERT(__offset + __count <= size(), "count + offset out of range in span::subspan(offset, count)"); + return {data() + __offset, __count}; + } + + _LIBCPP_INLINE_VISIBILITY constexpr index_type size() const noexcept { return _Extent; } + _LIBCPP_INLINE_VISIBILITY constexpr index_type size_bytes() const noexcept { return _Extent * sizeof(element_type); } + _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; } + + _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](index_type __idx) const noexcept + { + _LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span[] index out of bounds"); + return __data[__idx]; + } + + _LIBCPP_INLINE_VISIBILITY constexpr reference operator()(index_type __idx) const noexcept + { + _LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span() index out of bounds"); + return __data[__idx]; + } + + _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } + +// [span.iter], span iterator support + _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } + _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_iterator cbegin() const noexcept { return const_iterator(data()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_iterator cend() const noexcept { return const_iterator(data() + size()); } + _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } + _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); } + + _LIBCPP_INLINE_VISIBILITY constexpr void swap(span &__other) noexcept + { + pointer __p = __data; + __data = __other.__data; + __other.__data = __p; + } + + _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept + { return {reinterpret_cast(data()), size_bytes()}; } + + _LIBCPP_INLINE_VISIBILITY span __as_writeable_bytes() const noexcept + { return {reinterpret_cast(data()), size_bytes()}; } + +private: + pointer __data; + +}; + + +template +class _LIBCPP_TEMPLATE_VIS span<_Tp, dynamic_extent> { +private: + +public: +// constants and types + using element_type = _Tp; + using value_type = remove_cv_t<_Tp>; + using index_type = ptrdiff_t; + using difference_type = ptrdiff_t; + using pointer = _Tp *; + using const_pointer = const _Tp *; // not in standard + using reference = _Tp &; + using const_reference = const _Tp &; // not in standard + using iterator = __wrap_iter; + using const_iterator = __wrap_iter; + using reverse_iterator = _VSTD::reverse_iterator; + using const_reverse_iterator = _VSTD::reverse_iterator; + + static constexpr index_type extent = dynamic_extent; + +// [span.cons], span constructors, copy, assignment, and destructor + _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr}, __size{0} {} + + constexpr span (const span&) noexcept = default; + constexpr span& operator=(const span&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __ptr, index_type __count) : __data{__ptr}, __size{__count} {} + _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __f, pointer __l) : __data{__f}, __size{distance(__f, __l)} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(element_type (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(array& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const array& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span( _Container& __c, + enable_if_t<__is_span_compatible_container<_Container, _Tp>::value, nullptr_t> = nullptr) + : __data{_VSTD::data(__c)}, __size{(index_type) _VSTD::size(__c)} {} + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const _Container& __c, + enable_if_t<__is_span_compatible_container::value, nullptr_t> = nullptr) + : __data{_VSTD::data(__c)}, __size{(index_type) _VSTD::size(__c)} {} + + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span(const span<_OtherElementType, _OtherExtent>& __other, + enable_if_t< + is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, + nullptr_t> = nullptr) noexcept + : __data{__other.data()}, __size{__other.size()} {} + +// ~span() noexcept = default; + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span first() const noexcept + { + static_assert(_Count >= 0, "Count must be >= 0 in span::first()"); + _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::first()"); + return {data(), _Count}; + } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span last() const noexcept + { + static_assert(_Count >= 0, "Count must be >= 0 in span::last()"); + _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::last()"); + return {data() + size() - _Count, _Count}; + } + + _LIBCPP_INLINE_VISIBILITY + constexpr span first(index_type __count) const noexcept + { + _LIBCPP_ASSERT(__count >= 0 && __count <= size(), "Count out of range in span::first(count)"); + return {data(), __count}; + } + + _LIBCPP_INLINE_VISIBILITY + constexpr span last (index_type __count) const noexcept + { + _LIBCPP_ASSERT(__count >= 0 && __count <= size(), "Count out of range in span::last(count)"); + return {data() + size() - __count, __count}; + } + + template + inline _LIBCPP_INLINE_VISIBILITY + constexpr span<_Tp, dynamic_extent> subspan() const noexcept + { + _LIBCPP_ASSERT(_Offset >= 0 && _Offset <= size(), "Offset out of range in span::subspan()"); + _LIBCPP_ASSERT(_Count == dynamic_extent || _Offset + _Count <= size(), "Count out of range in span::subspan()"); + return {data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; + } + + constexpr span + inline _LIBCPP_INLINE_VISIBILITY + subspan(index_type __offset, index_type __count = dynamic_extent) const noexcept + { + _LIBCPP_ASSERT( __offset >= 0 && __offset <= size(), "Offset out of range in span::subspan(offset, count)"); + _LIBCPP_ASSERT((__count >= 0 && __count <= size()) || __count == dynamic_extent, "count out of range in span::subspan(offset, count)"); + if (__count == dynamic_extent) + return {data() + __offset, size() - __offset}; + _LIBCPP_ASSERT(__offset + __count <= size(), "Offset + count out of range in span::subspan(offset, count)"); + return {data() + __offset, __count}; + } + + _LIBCPP_INLINE_VISIBILITY constexpr index_type size() const noexcept { return __size; } + _LIBCPP_INLINE_VISIBILITY constexpr index_type size_bytes() const noexcept { return __size * sizeof(element_type); } + _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; } + + _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](index_type __idx) const noexcept + { + _LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span[] index out of bounds"); + return __data[__idx]; + } + + _LIBCPP_INLINE_VISIBILITY constexpr reference operator()(index_type __idx) const noexcept + { + _LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span() index out of bounds"); + return __data[__idx]; + } + + _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } + +// [span.iter], span iterator support + _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } + _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_iterator cbegin() const noexcept { return const_iterator(data()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_iterator cend() const noexcept { return const_iterator(data() + size()); } + _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } + _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); } + _LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); } + + _LIBCPP_INLINE_VISIBILITY constexpr void swap(span &__other) noexcept + { + pointer __p = __data; + __data = __other.__data; + __other.__data = __p; + + index_type __sz = __size; + __size = __other.__size; + __other.__size = __sz; + } + + _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept + { return {reinterpret_cast(data()), size_bytes()}; } + + _LIBCPP_INLINE_VISIBILITY span __as_writeable_bytes() const noexcept + { return {reinterpret_cast(data()), size_bytes()}; } + +private: + pointer __data; + index_type __size; +}; + +template + constexpr bool + operator==(const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return equal(__lhs.begin(), __lhs.end(), __rhs.begin(), __rhs.end()); } + +template + constexpr bool + operator!=(const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return !(__rhs == __lhs); } + +template + constexpr bool + operator< (const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return lexicographical_compare (__lhs.begin(), __lhs.end(), __rhs.begin(), __rhs.end()); } + +template + constexpr bool + operator<=(const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return !(__rhs < __lhs); } + +template + constexpr bool + operator> (const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return __rhs < __lhs; } + +template + constexpr bool + operator>=(const span<_Tp1, _Extent1>& __lhs, const span<_Tp2, _Extent2>& __rhs) + { return !(__lhs < __rhs); } + +// as_bytes & as_writeable_bytes +template + auto as_bytes(span<_Tp, _Extent> __s) noexcept + -> decltype(__s.__as_bytes()) + { return __s.__as_bytes(); } + +template + auto as_writeable_bytes(span<_Tp, _Extent> __s) noexcept + -> typename enable_if, decltype(__s.__as_writeable_bytes())>::type + { return __s.__as_writeable_bytes(); } + +template + constexpr void swap(span<_Tp, _Extent> &__lhs, span<_Tp, _Extent> &__rhs) noexcept + { __lhs.swap(__rhs); } + + +// Deduction guides +template + span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>; + +template + span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>; + +template + span(const array<_Tp, _Sz>&) -> span; + +template + span(_Container&) -> span; + +template + span(const _Container&) -> span; + +#endif // _LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_SPAN diff --git a/include/c++/v1/sstream b/include/c++/v1/sstream index b01f47b..9c3ee13 100644 --- a/include/c++/v1/sstream +++ b/include/c++/v1/sstream @@ -473,12 +473,12 @@ basic_stringbuf<_CharT, _Traits, _Allocator>::str(const string_type& __s) { while (__sz > INT_MAX) { - this->pbump(INT_MAX); - __sz -= INT_MAX; + this->pbump(INT_MAX); + __sz -= INT_MAX; } if (__sz > 0) - this->pbump(__sz); - } + this->pbump(__sz); + } } } diff --git a/include/c++/v1/stack b/include/c++/v1/stack index 7b81ade..2b3f8ae 100644 --- a/include/c++/v1/stack +++ b/include/c++/v1/stack @@ -61,6 +61,12 @@ public: void swap(stack& c) noexcept(is_nothrow_swappable_v) }; +template + stack(Container) -> stack; // C++17 + +template + stack(Container, Allocator) -> stack; // C++17 + template bool operator==(const stack& x, const stack& y); template @@ -229,6 +235,22 @@ public: operator< (const stack& __x, const stack& __y); }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value, nullptr_t>::type +> +stack(_Container) + -> stack; + +template::value, nullptr_t>::type, + class = typename enable_if< __is_allocator<_Alloc>::value, nullptr_t>::type + > +stack(_Container, _Alloc) + -> stack; +#endif + template inline _LIBCPP_INLINE_VISIBILITY bool diff --git a/include/c++/v1/stddef.h b/include/c++/v1/stddef.h index faf8552..f65065d 100644 --- a/include/c++/v1/stddef.h +++ b/include/c++/v1/stddef.h @@ -54,7 +54,7 @@ using std::nullptr_t; // Re-use the compiler's max_align_t where possible. #if !defined(__CLANG_MAX_ALIGN_T_DEFINED) && !defined(_GCC_MAX_ALIGN_T) && \ - !defined(__DEFINED_max_align_t) + !defined(__DEFINED_max_align_t) && !defined(__NetBSD__) typedef long double max_align_t; #endif diff --git a/include/c++/v1/stdexcept b/include/c++/v1/stdexcept index 95a96cc..3ec7934 100644 --- a/include/c++/v1/stdexcept +++ b/include/c++/v1/stdexcept @@ -185,73 +185,73 @@ _LIBCPP_BEGIN_NAMESPACE_STD // in the dylib _LIBCPP_NORETURN _LIBCPP_FUNC_VIS void __throw_runtime_error(const char*); -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_logic_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw logic_error(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_domain_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw domain_error(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_invalid_argument(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw invalid_argument(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_length_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw length_error(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_out_of_range(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw out_of_range(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_range_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS throw range_error(__msg); #else ((void)__msg); - _VSTD::abort(); + _VSTD::abort(); #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_overflow_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS @@ -262,7 +262,7 @@ void __throw_overflow_error(const char*__msg) #endif } -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_underflow_error(const char*__msg) { #ifndef _LIBCPP_NO_EXCEPTIONS diff --git a/include/c++/v1/streambuf b/include/c++/v1/streambuf index e409d21..37a6532 100644 --- a/include/c++/v1/streambuf +++ b/include/c++/v1/streambuf @@ -236,9 +236,9 @@ protected: void swap(basic_streambuf& __rhs); // 27.6.2.3.2 Get area: - _LIBCPP_ALWAYS_INLINE char_type* eback() const {return __binp_;} - _LIBCPP_ALWAYS_INLINE char_type* gptr() const {return __ninp_;} - _LIBCPP_ALWAYS_INLINE char_type* egptr() const {return __einp_;} + _LIBCPP_INLINE_VISIBILITY char_type* eback() const {return __binp_;} + _LIBCPP_INLINE_VISIBILITY char_type* gptr() const {return __ninp_;} + _LIBCPP_INLINE_VISIBILITY char_type* egptr() const {return __einp_;} inline _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY void gbump(int __n) { __ninp_ += __n; } @@ -251,14 +251,14 @@ protected: } // 27.6.2.3.3 Put area: - _LIBCPP_ALWAYS_INLINE char_type* pbase() const {return __bout_;} - _LIBCPP_ALWAYS_INLINE char_type* pptr() const {return __nout_;} - _LIBCPP_ALWAYS_INLINE char_type* epptr() const {return __eout_;} + _LIBCPP_INLINE_VISIBILITY char_type* pbase() const {return __bout_;} + _LIBCPP_INLINE_VISIBILITY char_type* pptr() const {return __nout_;} + _LIBCPP_INLINE_VISIBILITY char_type* epptr() const {return __eout_;} inline _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY void pbump(int __n) { __nout_ += __n; } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void __pbump(streamsize __n) { __nout_ += __n; } inline _LIBCPP_EXTERN_TEMPLATE_INLINE_VISIBILITY diff --git a/include/c++/v1/string b/include/c++/v1/string index 7218aa2..c79164c 100644 --- a/include/c++/v1/string +++ b/include/c++/v1/string @@ -104,7 +104,8 @@ public: const Allocator& a = Allocator()); template basic_string(const T& t, size_type pos, size_type n, const Allocator& a = Allocator()); // C++17 - explicit basic_string(const basic_string_view sv, const Allocator& a = Allocator()); + template + explicit basic_string(const T& t, const Allocator& a = Allocator()); // C++17 basic_string(const value_type* s, const allocator_type& a = allocator_type()); basic_string(const value_type* s, size_type n, const allocator_type& a = allocator_type()); basic_string(size_type n, value_type c, const allocator_type& a = allocator_type()); @@ -120,7 +121,8 @@ public: operator basic_string_view() const noexcept; basic_string& operator=(const basic_string& str); - basic_string& operator=(basic_string_view sv); + template + basic_string& operator=(const T& t); // C++17 basic_string& operator=(basic_string&& str) noexcept( allocator_type::propagate_on_container_move_assignment::value || @@ -164,13 +166,15 @@ public: reference at(size_type n); basic_string& operator+=(const basic_string& str); - basic_string& operator+=(basic_string_view sv); + template + basic_string& operator+=(const T& t); // C++17 basic_string& operator+=(const value_type* s); basic_string& operator+=(value_type c); basic_string& operator+=(initializer_list); basic_string& append(const basic_string& str); - basic_string& append(basic_string_view sv); + template + basic_string& append(const T& t); // C++17 basic_string& append(const basic_string& str, size_type pos, size_type n=npos); //C++14 template basic_string& append(const T& t, size_type pos, size_type n=npos); // C++17 @@ -189,7 +193,8 @@ public: const_reference back() const; basic_string& assign(const basic_string& str); - basic_string& assign(basic_string_view sv); + template + basic_string& assign(const T& t); // C++17 basic_string& assign(basic_string&& str); basic_string& assign(const basic_string& str, size_type pos, size_type n=npos); // C++14 template @@ -202,7 +207,8 @@ public: basic_string& assign(initializer_list); basic_string& insert(size_type pos1, const basic_string& str); - basic_string& insert(size_type pos1, basic_string_view sv); + template + basic_string& insert(size_type pos1, const T& t); basic_string& insert(size_type pos1, const basic_string& str, size_type pos2, size_type n); template @@ -221,7 +227,8 @@ public: iterator erase(const_iterator first, const_iterator last); basic_string& replace(size_type pos1, size_type n1, const basic_string& str); - basic_string& replace(size_type pos1, size_type n1, basic_string_view sv); + template + basic_string& replace(size_type pos1, size_type n1, const T& t); // C++17 basic_string& replace(size_type pos1, size_type n1, const basic_string& str, size_type pos2, size_type n2=npos); // C++14 template @@ -231,7 +238,8 @@ public: basic_string& replace(size_type pos, size_type n1, const value_type* s); basic_string& replace(size_type pos, size_type n1, size_type n2, value_type c); basic_string& replace(const_iterator i1, const_iterator i2, const basic_string& str); - basic_string& replace(const_iterator i1, const_iterator i2, basic_string_view sv); + template + basic_string& replace(const_iterator i1, const_iterator i2, const T& t); // C++17 basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s, size_type n); basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s); basic_string& replace(const_iterator i1, const_iterator i2, size_type n, value_type c); @@ -253,45 +261,53 @@ public: allocator_type get_allocator() const noexcept; size_type find(const basic_string& str, size_type pos = 0) const noexcept; - size_type find(basic_string_view sv, size_type pos = 0) const noexcept; + template + size_type find(const T& t, size_type pos = 0) const; // C++17 size_type find(const value_type* s, size_type pos, size_type n) const noexcept; size_type find(const value_type* s, size_type pos = 0) const noexcept; size_type find(value_type c, size_type pos = 0) const noexcept; size_type rfind(const basic_string& str, size_type pos = npos) const noexcept; - size_type rfind(basic_string_view sv, size_type pos = npos) const noexcept; + template + size_type rfind(const T& t, size_type pos = npos) const; // C++17 size_type rfind(const value_type* s, size_type pos, size_type n) const noexcept; size_type rfind(const value_type* s, size_type pos = npos) const noexcept; size_type rfind(value_type c, size_type pos = npos) const noexcept; size_type find_first_of(const basic_string& str, size_type pos = 0) const noexcept; - size_type find_first_of(basic_string_view sv, size_type pos = 0) const noexcept; + template + size_type find_first_of(const T& t, size_type pos = 0) const; // C++17 size_type find_first_of(const value_type* s, size_type pos, size_type n) const noexcept; size_type find_first_of(const value_type* s, size_type pos = 0) const noexcept; size_type find_first_of(value_type c, size_type pos = 0) const noexcept; size_type find_last_of(const basic_string& str, size_type pos = npos) const noexcept; - size_type find_last_of(basic_string_view sv, size_type pos = npos) const noexcept; + template + size_type find_last_of(const T& t, size_type pos = npos) const noexcept; // C++17 size_type find_last_of(const value_type* s, size_type pos, size_type n) const noexcept; size_type find_last_of(const value_type* s, size_type pos = npos) const noexcept; size_type find_last_of(value_type c, size_type pos = npos) const noexcept; size_type find_first_not_of(const basic_string& str, size_type pos = 0) const noexcept; - size_type find_first_not_of(basic_string_view sv, size_type pos = 0) const noexcept; + template + size_type find_first_not_of(const T& t, size_type pos = 0) const; // C++17 size_type find_first_not_of(const value_type* s, size_type pos, size_type n) const noexcept; size_type find_first_not_of(const value_type* s, size_type pos = 0) const noexcept; size_type find_first_not_of(value_type c, size_type pos = 0) const noexcept; size_type find_last_not_of(const basic_string& str, size_type pos = npos) const noexcept; - size_type find_last_not_of(basic_string_view sv, size_type pos = npos) const noexcept; + template + size_type find_last_not_of(const T& t, size_type pos = npos) const; // C++17 size_type find_last_not_of(const value_type* s, size_type pos, size_type n) const noexcept; size_type find_last_not_of(const value_type* s, size_type pos = npos) const noexcept; size_type find_last_not_of(value_type c, size_type pos = npos) const noexcept; int compare(const basic_string& str) const noexcept; - int compare(basic_string_view sv) const noexcept; + template + int compare(const T& t) const noexcept; // C++17 int compare(size_type pos1, size_type n1, const basic_string& str) const; - int compare(size_type pos1, size_type n1, basic_string_view sv) const; + template + int compare(size_type pos1, size_type n1, const T& t) const; // C++17 int compare(size_type pos1, size_type n1, const basic_string& str, size_type pos2, size_type n2=npos) const; // C++14 template @@ -494,6 +510,7 @@ basic_string operator "" s( const char32_t *str, size_t len ); // C++1 #include #include #include <__functional_base> +#include #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS #include #endif @@ -665,6 +682,7 @@ public: "traits_type::char_type must be the same type as CharT"); static_assert(( is_same::value), "Allocator::value_type must be same type as value_type"); + #if defined(_LIBCPP_RAW_ITERATORS) typedef pointer iterator; typedef const_pointer const_iterator; @@ -788,31 +806,51 @@ public: _LIBCPP_INLINE_VISIBILITY basic_string(basic_string&& __str, const allocator_type& __a); #endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY basic_string(const _CharT* __s); + + template ::value, nullptr_t>::type> _LIBCPP_INLINE_VISIBILITY - basic_string(const _CharT* __s, const _Allocator& __a); + basic_string(const _CharT* __s) { + _LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr"); + __init(__s, traits_type::length(__s)); +# if _LIBCPP_DEBUG_LEVEL >= 2 + __get_db()->__insert_c(this); +# endif + } + + template ::value, nullptr_t>::type> + _LIBCPP_INLINE_VISIBILITY + basic_string(const _CharT* __s, const _Allocator& __a); + _LIBCPP_INLINE_VISIBILITY basic_string(const _CharT* __s, size_type __n); _LIBCPP_INLINE_VISIBILITY basic_string(const _CharT* __s, size_type __n, const _Allocator& __a); _LIBCPP_INLINE_VISIBILITY basic_string(size_type __n, _CharT __c); - _LIBCPP_INLINE_VISIBILITY - basic_string(size_type __n, _CharT __c, const _Allocator& __a); + + template ::value, nullptr_t>::type> + _LIBCPP_INLINE_VISIBILITY + basic_string(size_type __n, _CharT __c, const _Allocator& __a); + basic_string(const basic_string& __str, size_type __pos, size_type __n, const _Allocator& __a = _Allocator()); _LIBCPP_INLINE_VISIBILITY basic_string(const basic_string& __str, size_type __pos, const _Allocator& __a = _Allocator()); - template + + template::value, void>::type> _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS basic_string(const _Tp& __t, size_type __pos, size_type __n, - const allocator_type& __a = allocator_type(), - typename enable_if<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, void>::type* = 0); - _LIBCPP_INLINE_VISIBILITY explicit - basic_string(__self_view __sv); - _LIBCPP_INLINE_VISIBILITY - basic_string(__self_view __sv, const _Allocator& __a); + const allocator_type& __a = allocator_type()); + + template::value, void>::type> + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + explicit basic_string(const _Tp& __t); + + template::value, void>::type> + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + explicit basic_string(const _Tp& __t, const allocator_type& __a); + template _LIBCPP_INLINE_VISIBILITY basic_string(_InputIterator __first, _InputIterator __last); @@ -833,11 +871,10 @@ public: basic_string& operator=(const basic_string& __str); -#ifndef _LIBCPP_CXX03_LANG - template -#endif - _LIBCPP_INLINE_VISIBILITY - basic_string& operator=(__self_view __sv) {return assign(__sv);} + template ::value, void>::type> + basic_string& operator=(const _Tp& __t) + {__self_view __sv = __t; return assign(__sv);} + #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_string& operator=(basic_string&& __str) @@ -927,7 +964,15 @@ public: reference at(size_type __n); _LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const basic_string& __str) {return append(__str);} - _LIBCPP_INLINE_VISIBILITY basic_string& operator+=(__self_view __sv) {return append(__sv);} + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + operator+=(const _Tp& __t) {__self_view __sv = __t; return append(__sv);} _LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const value_type* __s) {return append(__s);} _LIBCPP_INLINE_VISIBILITY basic_string& operator+=(value_type __c) {push_back(__c); return *this;} #ifndef _LIBCPP_CXX03_LANG @@ -936,9 +981,17 @@ public: _LIBCPP_INLINE_VISIBILITY basic_string& append(const basic_string& __str); - _LIBCPP_INLINE_VISIBILITY - basic_string& append(__self_view __sv) { return append(__sv.data(), __sv.size()); } + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + append(const _Tp& __t) { __self_view __sv = __t; return append(__sv.data(), __sv.size()); } basic_string& append(const basic_string& __str, size_type __pos, size_type __n=npos); + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS typename enable_if @@ -993,8 +1046,14 @@ public: _LIBCPP_INLINE_VISIBILITY reference back(); _LIBCPP_INLINE_VISIBILITY const_reference back() const; - _LIBCPP_INLINE_VISIBILITY - basic_string& assign(__self_view __sv) { return assign(__sv.data(), __sv.size()); } + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + assign(const _Tp & __t) { __self_view __sv = __t; return assign(__sv.data(), __sv.size()); } _LIBCPP_INLINE_VISIBILITY basic_string& assign(const basic_string& __str) { return *this = __str; } #ifndef _LIBCPP_CXX03_LANG @@ -1040,8 +1099,17 @@ public: _LIBCPP_INLINE_VISIBILITY basic_string& insert(size_type __pos1, const basic_string& __str); - _LIBCPP_INLINE_VISIBILITY - basic_string& insert(size_type __pos1, __self_view __sv) { return insert(__pos1, __sv.data(), __sv.size()); } + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + insert(size_type __pos1, const _Tp& __t) + { __self_view __sv = __t; return insert(__pos1, __sv.data(), __sv.size()); } + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS typename enable_if @@ -1089,8 +1157,15 @@ public: _LIBCPP_INLINE_VISIBILITY basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str); - _LIBCPP_INLINE_VISIBILITY - basic_string& replace(size_type __pos1, size_type __n1, __self_view __sv) { return replace(__pos1, __n1, __sv.data(), __sv.size()); } + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + replace(size_type __pos1, size_type __n1, const _Tp& __t) { __self_view __sv = __t; return replace(__pos1, __n1, __sv.data(), __sv.size()); } basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos); template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS @@ -1105,8 +1180,16 @@ public: basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c); _LIBCPP_INLINE_VISIBILITY basic_string& replace(const_iterator __i1, const_iterator __i2, const basic_string& __str); - _LIBCPP_INLINE_VISIBILITY - basic_string& replace(const_iterator __i1, const_iterator __i2, __self_view __sv) { return replace(__i1 - begin(), __i2 - __i1, __sv); } + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + basic_string& + >::type + replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) { __self_view __sv = __t; return replace(__i1 - begin(), __i2 - __i1, __sv); } + _LIBCPP_INLINE_VISIBILITY basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n); _LIBCPP_INLINE_VISIBILITY @@ -1154,8 +1237,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type find(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type find(__self_view __sv, size_type __pos = 0) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + find(const _Tp& __t, size_type __pos = 0) const; size_type find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type find(const value_type* __s, size_type __pos = 0) const _NOEXCEPT; @@ -1163,8 +1253,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type rfind(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type rfind(__self_view __sv, size_type __pos = npos) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + rfind(const _Tp& __t, size_type __pos = npos) const; size_type rfind(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type rfind(const value_type* __s, size_type __pos = npos) const _NOEXCEPT; @@ -1172,8 +1269,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type find_first_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type find_first_of(__self_view __sv, size_type __pos = 0) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + find_first_of(const _Tp& __t, size_type __pos = 0) const; size_type find_first_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type find_first_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT; @@ -1182,8 +1286,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type find_last_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type find_last_of(__self_view __sv, size_type __pos = npos) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + find_last_of(const _Tp& __t, size_type __pos = npos) const; size_type find_last_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type find_last_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT; @@ -1192,8 +1303,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type find_first_not_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type find_first_not_of(__self_view __sv, size_type __pos = 0) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + find_first_not_of(const _Tp &__t, size_type __pos = 0) const; size_type find_first_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type find_first_not_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT; @@ -1202,8 +1320,15 @@ public: _LIBCPP_INLINE_VISIBILITY size_type find_last_not_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - size_type find_last_not_of(__self_view __sv, size_type __pos = npos) const _NOEXCEPT; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + size_type + >::type + find_last_not_of(const _Tp& __t, size_type __pos = npos) const; size_type find_last_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type find_last_not_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT; @@ -1212,13 +1337,29 @@ public: _LIBCPP_INLINE_VISIBILITY int compare(const basic_string& __str) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - int compare(__self_view __sv) const _NOEXCEPT; - _LIBCPP_INLINE_VISIBILITY - int compare(size_type __pos1, size_type __n1, __self_view __sv) const; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + int + >::type + compare(const _Tp &__t) const; + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS + typename enable_if + < + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + int + >::type + compare(size_type __pos1, size_type __n1, const _Tp& __t) const; + _LIBCPP_INLINE_VISIBILITY int compare(size_type __pos1, size_type __n1, const basic_string& __str) const; int compare(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos) const; + template inline _LIBCPP_INLINE_VISIBILITY typename enable_if @@ -1259,7 +1400,7 @@ public: _LIBCPP_INLINE_VISIBILITY bool __invariants() const; - _LIBCPP_INLINE_VISIBILITY void __clear_and_shrink(); + _LIBCPP_INLINE_VISIBILITY void __clear_and_shrink() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY bool __is_long() const _NOEXCEPT @@ -1503,6 +1644,23 @@ template basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator()) -> basic_string<_CharT, char_traits<_CharT>, _Allocator>; + +template, + class = typename enable_if<__is_allocator<_Allocator>::value, void>::type + > +explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _Allocator()) + -> basic_string<_CharT, _Traits, _Allocator>; + +template, + class = typename enable_if<__is_allocator<_Allocator>::value, void>::type, + class _Sz = typename allocator_traits<_Allocator>::size_type + > +basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _Allocator()) + -> basic_string<_CharT, _Traits, _Allocator>; #endif @@ -1623,18 +1781,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty } template -inline _LIBCPP_INLINE_VISIBILITY -basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s) -{ - _LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr"); - __init(__s, traits_type::length(__s)); -#if _LIBCPP_DEBUG_LEVEL >= 2 - __get_db()->__insert_c(this); -#endif -} - -template -inline _LIBCPP_INLINE_VISIBILITY +template basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const _Allocator& __a) : __r_(__second_tag(), __a) { @@ -1771,7 +1918,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __ } template -inline _LIBCPP_INLINE_VISIBILITY +template basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __c, const _Allocator& __a) : __r_(__second_tag(), __a) { @@ -1812,13 +1959,13 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st } template -template +template basic_string<_CharT, _Traits, _Allocator>::basic_string( - const _Tp& __t, size_type __pos, size_type __n, const allocator_type& __a, - typename enable_if<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, void>::type *) + const _Tp& __t, size_type __pos, size_type __n, const allocator_type& __a) : __r_(__second_tag(), __a) { - __self_view __sv = __self_view(__t).substr(__pos, __n); + __self_view __sv0 = __t; + __self_view __sv = __sv0.substr(__pos, __n); __init(__sv.data(), __sv.size()); #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); @@ -1826,9 +1973,10 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( } template -inline _LIBCPP_INLINE_VISIBILITY -basic_string<_CharT, _Traits, _Allocator>::basic_string(__self_view __sv) +template +basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t) { + __self_view __sv = __t; __init(__sv.data(), __sv.size()); #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); @@ -1836,10 +1984,11 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(__self_view __sv) } template -inline _LIBCPP_INLINE_VISIBILITY -basic_string<_CharT, _Traits, _Allocator>::basic_string(__self_view __sv, const _Allocator& __a) +template +basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t, const _Allocator& __a) : __r_(__second_tag(), __a) { + __self_view __sv = __t; __init(__sv.data(), __sv.size()); #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); @@ -3149,11 +3298,16 @@ basic_string<_CharT, _Traits, _Allocator>::find(const basic_string& __str, } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::find(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::find(const _Tp &__t, + size_type __pos) const { + __self_view __sv = __t; return __str_find (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3202,11 +3356,16 @@ basic_string<_CharT, _Traits, _Allocator>::rfind(const basic_string& __str, } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::rfind(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::rfind(const _Tp& __t, + size_type __pos) const { + __self_view __sv = __t; return __str_rfind (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3255,11 +3414,16 @@ basic_string<_CharT, _Traits, _Allocator>::find_first_of(const basic_string& __s } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::find_first_of(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::find_first_of(const _Tp& __t, + size_type __pos) const { + __self_view __sv = __t; return __str_find_first_of (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3308,11 +3472,16 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_of(const basic_string& __st } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::find_last_of(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::find_last_of(const _Tp& __t, + size_type __pos) const { + __self_view __sv = __t; return __str_find_last_of (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3361,11 +3530,16 @@ basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const basic_string& } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const _Tp& __t, + size_type __pos) const { + __self_view __sv = __t; return __str_find_first_not_of (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3415,11 +3589,16 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const basic_string& } template -inline _LIBCPP_INLINE_VISIBILITY -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(__self_view __sv, - size_type __pos) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + typename basic_string<_CharT, _Traits, _Allocator>::size_type +>::type +basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const _Tp& __t, + size_type __pos) const { + __self_view __sv = __t; return __str_find_last_not_of (data(), size(), __sv.data(), __pos, __sv.size()); } @@ -3448,10 +3627,15 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(value_type __c, // compare template -inline _LIBCPP_INLINE_VISIBILITY -int -basic_string<_CharT, _Traits, _Allocator>::compare(__self_view __sv) const _NOEXCEPT +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + int +>::type +basic_string<_CharT, _Traits, _Allocator>::compare(const _Tp& __t) const { + __self_view __sv = __t; size_t __lhs_sz = size(); size_t __rhs_sz = __sv.size(); int __result = traits_type::compare(data(), __sv.data(), @@ -3497,12 +3681,17 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1, } template -inline _LIBCPP_INLINE_VISIBILITY -int +template +typename enable_if +< + __can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, + int +>::type basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1, size_type __n1, - __self_view __sv) const + const _Tp& __t) const { + __self_view __sv = __t; return compare(__pos1, __n1, __sv.data(), __sv.size()); } @@ -3585,7 +3774,7 @@ basic_string<_CharT, _Traits, _Allocator>::__invariants() const template inline _LIBCPP_INLINE_VISIBILITY void -basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() +basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT { clear(); if(__is_long()) diff --git a/include/c++/v1/string_view b/include/c++/v1/string_view index 6377aeb..55dce72 100644 --- a/include/c++/v1/string_view +++ b/include/c++/v1/string_view @@ -178,6 +178,7 @@ namespace std { #include #include #include +#include #include <__debug> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/include/c++/v1/support/android/locale_bionic.h b/include/c++/v1/support/android/locale_bionic.h index 081035d..50fcf5c 100644 --- a/include/c++/v1/support/android/locale_bionic.h +++ b/include/c++/v1/support/android/locale_bionic.h @@ -24,7 +24,45 @@ extern "C" { } #endif +#if defined(__ANDROID__) + +#include +#include #include +// In NDK versions later than 16, locale-aware functions are provided by +// legacy_stdlib_inlines.h +#if __NDK_MAJOR__ <= 16 +#if __ANDROID_API__ < 21 +#include +#elif __ANDROID_API__ < 26 + +#if defined(__cplusplus) +extern "C" { +#endif + +inline _LIBCPP_INLINE_VISIBILITY float strtof_l(const char* __nptr, char** __endptr, + locale_t) { + return ::strtof(__nptr, __endptr); +} + +inline _LIBCPP_INLINE_VISIBILITY double strtod_l(const char* __nptr, + char** __endptr, locale_t) { + return ::strtod(__nptr, __endptr); +} + +inline _LIBCPP_INLINE_VISIBILITY long strtol_l(const char* __nptr, char** __endptr, + int __base, locale_t) { + return ::strtol(__nptr, __endptr, __base); +} + +#if defined(__cplusplus) +} +#endif + +#endif // __ANDROID_API__ < 26 + +#endif // __NDK_MAJOR__ <= 16 +#endif // defined(__ANDROID__) #endif // defined(__BIONIC__) #endif // _LIBCPP_SUPPORT_ANDROID_LOCALE_BIONIC_H diff --git a/include/c++/v1/support/newlib/xlocale.h b/include/c++/v1/support/newlib/xlocale.h index 4b7e9b7..09f9e39 100644 --- a/include/c++/v1/support/newlib/xlocale.h +++ b/include/c++/v1/support/newlib/xlocale.h @@ -19,9 +19,9 @@ #if !defined(__NEWLIB__) || __NEWLIB__ < 2 || \ __NEWLIB__ == 2 && __NEWLIB_MINOR__ < 5 #include -#endif #include #include +#endif #endif // _NEWLIB_VERSION diff --git a/include/c++/v1/support/win32/locale_win32.h b/include/c++/v1/support/win32/locale_win32.h index aebfff2..c7c6d78 100644 --- a/include/c++/v1/support/win32/locale_win32.h +++ b/include/c++/v1/support/win32/locale_win32.h @@ -35,8 +35,8 @@ class locale_t { : __locale(nullptr), __locale_str(nullptr) {} locale_t(std::nullptr_t) : __locale(nullptr), __locale_str(nullptr) {} - locale_t(_locale_t __locale, const char* __locale_str) - : __locale(__locale), __locale_str(__locale_str) {} + locale_t(_locale_t __xlocale, const char* __xlocale_str) + : __locale(__xlocale), __locale_str(__xlocale_str) {} friend bool operator==(const locale_t& __left, const locale_t& __right) { return __left.__locale == __right.__locale; @@ -46,6 +46,10 @@ class locale_t { return __left.__locale == nullptr && __right == 0; } + friend bool operator==(const locale_t& __left, long long __right) { + return __left.__locale == nullptr && __right == 0; + } + friend bool operator==(const locale_t& __left, std::nullptr_t) { return __left.__locale == nullptr; } @@ -66,6 +70,10 @@ class locale_t { return !(__left == __right); } + friend bool operator!=(const locale_t& __left, long long __right) { + return !(__left == __right); + } + friend bool operator!=(const locale_t& __left, std::nullptr_t __right) { return !(__left == __right); } diff --git a/include/c++/v1/support/xlocale/__posix_l_fallback.h b/include/c++/v1/support/xlocale/__posix_l_fallback.h index c893a67..b9a0939 100644 --- a/include/c++/v1/support/xlocale/__posix_l_fallback.h +++ b/include/c++/v1/support/xlocale/__posix_l_fallback.h @@ -20,141 +20,141 @@ extern "C" { #endif -inline _LIBCPP_ALWAYS_INLINE int isalnum_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isalnum_l(int c, locale_t) { return ::isalnum(c); } -inline _LIBCPP_ALWAYS_INLINE int isalpha_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isalpha_l(int c, locale_t) { return ::isalpha(c); } -inline _LIBCPP_ALWAYS_INLINE int isblank_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isblank_l(int c, locale_t) { return ::isblank(c); } -inline _LIBCPP_ALWAYS_INLINE int iscntrl_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iscntrl_l(int c, locale_t) { return ::iscntrl(c); } -inline _LIBCPP_ALWAYS_INLINE int isdigit_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isdigit_l(int c, locale_t) { return ::isdigit(c); } -inline _LIBCPP_ALWAYS_INLINE int isgraph_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isgraph_l(int c, locale_t) { return ::isgraph(c); } -inline _LIBCPP_ALWAYS_INLINE int islower_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int islower_l(int c, locale_t) { return ::islower(c); } -inline _LIBCPP_ALWAYS_INLINE int isprint_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isprint_l(int c, locale_t) { return ::isprint(c); } -inline _LIBCPP_ALWAYS_INLINE int ispunct_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int ispunct_l(int c, locale_t) { return ::ispunct(c); } -inline _LIBCPP_ALWAYS_INLINE int isspace_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isspace_l(int c, locale_t) { return ::isspace(c); } -inline _LIBCPP_ALWAYS_INLINE int isupper_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isupper_l(int c, locale_t) { return ::isupper(c); } -inline _LIBCPP_ALWAYS_INLINE int isxdigit_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int isxdigit_l(int c, locale_t) { return ::isxdigit(c); } -inline _LIBCPP_ALWAYS_INLINE int iswalnum_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswalnum_l(wint_t c, locale_t) { return ::iswalnum(c); } -inline _LIBCPP_ALWAYS_INLINE int iswalpha_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswalpha_l(wint_t c, locale_t) { return ::iswalpha(c); } -inline _LIBCPP_ALWAYS_INLINE int iswblank_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswblank_l(wint_t c, locale_t) { return ::iswblank(c); } -inline _LIBCPP_ALWAYS_INLINE int iswcntrl_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswcntrl_l(wint_t c, locale_t) { return ::iswcntrl(c); } -inline _LIBCPP_ALWAYS_INLINE int iswdigit_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswdigit_l(wint_t c, locale_t) { return ::iswdigit(c); } -inline _LIBCPP_ALWAYS_INLINE int iswgraph_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswgraph_l(wint_t c, locale_t) { return ::iswgraph(c); } -inline _LIBCPP_ALWAYS_INLINE int iswlower_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswlower_l(wint_t c, locale_t) { return ::iswlower(c); } -inline _LIBCPP_ALWAYS_INLINE int iswprint_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswprint_l(wint_t c, locale_t) { return ::iswprint(c); } -inline _LIBCPP_ALWAYS_INLINE int iswpunct_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswpunct_l(wint_t c, locale_t) { return ::iswpunct(c); } -inline _LIBCPP_ALWAYS_INLINE int iswspace_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswspace_l(wint_t c, locale_t) { return ::iswspace(c); } -inline _LIBCPP_ALWAYS_INLINE int iswupper_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswupper_l(wint_t c, locale_t) { return ::iswupper(c); } -inline _LIBCPP_ALWAYS_INLINE int iswxdigit_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int iswxdigit_l(wint_t c, locale_t) { return ::iswxdigit(c); } -inline _LIBCPP_ALWAYS_INLINE int toupper_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int toupper_l(int c, locale_t) { return ::toupper(c); } -inline _LIBCPP_ALWAYS_INLINE int tolower_l(int c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int tolower_l(int c, locale_t) { return ::tolower(c); } -inline _LIBCPP_ALWAYS_INLINE wint_t towupper_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY wint_t towupper_l(wint_t c, locale_t) { return ::towupper(c); } -inline _LIBCPP_ALWAYS_INLINE wint_t towlower_l(wint_t c, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY wint_t towlower_l(wint_t c, locale_t) { return ::towlower(c); } -inline _LIBCPP_ALWAYS_INLINE int strcoll_l(const char *s1, const char *s2, - locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int strcoll_l(const char *s1, const char *s2, + locale_t) { return ::strcoll(s1, s2); } -inline _LIBCPP_ALWAYS_INLINE size_t strxfrm_l(char *dest, const char *src, - size_t n, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY size_t strxfrm_l(char *dest, const char *src, + size_t n, locale_t) { return ::strxfrm(dest, src, n); } -inline _LIBCPP_ALWAYS_INLINE size_t strftime_l(char *s, size_t max, - const char *format, - const struct tm *tm, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY size_t strftime_l(char *s, size_t max, + const char *format, + const struct tm *tm, locale_t) { return ::strftime(s, max, format, tm); } -inline _LIBCPP_ALWAYS_INLINE int wcscoll_l(const wchar_t *ws1, - const wchar_t *ws2, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY int wcscoll_l(const wchar_t *ws1, + const wchar_t *ws2, locale_t) { return ::wcscoll(ws1, ws2); } -inline _LIBCPP_ALWAYS_INLINE size_t wcsxfrm_l(wchar_t *dest, const wchar_t *src, - size_t n, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY size_t wcsxfrm_l(wchar_t *dest, const wchar_t *src, + size_t n, locale_t) { return ::wcsxfrm(dest, src, n); } diff --git a/include/c++/v1/support/xlocale/__strtonum_fallback.h b/include/c++/v1/support/xlocale/__strtonum_fallback.h index 4ae3918..50b4db3 100644 --- a/include/c++/v1/support/xlocale/__strtonum_fallback.h +++ b/include/c++/v1/support/xlocale/__strtonum_fallback.h @@ -20,43 +20,43 @@ extern "C" { #endif -inline _LIBCPP_ALWAYS_INLINE float strtof_l(const char *nptr, - char **endptr, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY float strtof_l(const char *nptr, + char **endptr, locale_t) { return ::strtof(nptr, endptr); } -inline _LIBCPP_ALWAYS_INLINE double strtod_l(const char *nptr, - char **endptr, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY double strtod_l(const char *nptr, + char **endptr, locale_t) { return ::strtod(nptr, endptr); } -inline _LIBCPP_ALWAYS_INLINE long double strtold_l(const char *nptr, - char **endptr, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY long double strtold_l(const char *nptr, + char **endptr, locale_t) { return ::strtold(nptr, endptr); } -inline _LIBCPP_ALWAYS_INLINE long long +inline _LIBCPP_INLINE_VISIBILITY long long strtoll_l(const char *nptr, char **endptr, int base, locale_t) { return ::strtoll(nptr, endptr, base); } -inline _LIBCPP_ALWAYS_INLINE unsigned long long +inline _LIBCPP_INLINE_VISIBILITY unsigned long long strtoull_l(const char *nptr, char **endptr, int base, locale_t) { return ::strtoull(nptr, endptr, base); } -inline _LIBCPP_ALWAYS_INLINE long long +inline _LIBCPP_INLINE_VISIBILITY long long wcstoll_l(const wchar_t *nptr, wchar_t **endptr, int base, locale_t) { return ::wcstoll(nptr, endptr, base); } -inline _LIBCPP_ALWAYS_INLINE unsigned long long +inline _LIBCPP_INLINE_VISIBILITY unsigned long long wcstoull_l(const wchar_t *nptr, wchar_t **endptr, int base, locale_t) { return ::wcstoull(nptr, endptr, base); } -inline _LIBCPP_ALWAYS_INLINE long double wcstold_l(const wchar_t *nptr, - wchar_t **endptr, locale_t) { +inline _LIBCPP_INLINE_VISIBILITY long double wcstold_l(const wchar_t *nptr, + wchar_t **endptr, locale_t) { return ::wcstold(nptr, endptr); } diff --git a/include/c++/v1/system_error b/include/c++/v1/system_error index c577edc..6e2c838 100644 --- a/include/c++/v1/system_error +++ b/include/c++/v1/system_error @@ -120,88 +120,6 @@ public: const char* what() const noexcept; }; -enum class errc -{ - address_family_not_supported, // EAFNOSUPPORT - address_in_use, // EADDRINUSE - address_not_available, // EADDRNOTAVAIL - already_connected, // EISCONN - argument_list_too_long, // E2BIG - argument_out_of_domain, // EDOM - bad_address, // EFAULT - bad_file_descriptor, // EBADF - bad_message, // EBADMSG - broken_pipe, // EPIPE - connection_aborted, // ECONNABORTED - connection_already_in_progress, // EALREADY - connection_refused, // ECONNREFUSED - connection_reset, // ECONNRESET - cross_device_link, // EXDEV - destination_address_required, // EDESTADDRREQ - device_or_resource_busy, // EBUSY - directory_not_empty, // ENOTEMPTY - executable_format_error, // ENOEXEC - file_exists, // EEXIST - file_too_large, // EFBIG - filename_too_long, // ENAMETOOLONG - function_not_supported, // ENOSYS - host_unreachable, // EHOSTUNREACH - identifier_removed, // EIDRM - illegal_byte_sequence, // EILSEQ - inappropriate_io_control_operation, // ENOTTY - interrupted, // EINTR - invalid_argument, // EINVAL - invalid_seek, // ESPIPE - io_error, // EIO - is_a_directory, // EISDIR - message_size, // EMSGSIZE - network_down, // ENETDOWN - network_reset, // ENETRESET - network_unreachable, // ENETUNREACH - no_buffer_space, // ENOBUFS - no_child_process, // ECHILD - no_link, // ENOLINK - no_lock_available, // ENOLCK - no_message_available, // ENODATA - no_message, // ENOMSG - no_protocol_option, // ENOPROTOOPT - no_space_on_device, // ENOSPC - no_stream_resources, // ENOSR - no_such_device_or_address, // ENXIO - no_such_device, // ENODEV - no_such_file_or_directory, // ENOENT - no_such_process, // ESRCH - not_a_directory, // ENOTDIR - not_a_socket, // ENOTSOCK - not_a_stream, // ENOSTR - not_connected, // ENOTCONN - not_enough_memory, // ENOMEM - not_supported, // ENOTSUP - operation_canceled, // ECANCELED - operation_in_progress, // EINPROGRESS - operation_not_permitted, // EPERM - operation_not_supported, // EOPNOTSUPP - operation_would_block, // EWOULDBLOCK - owner_dead, // EOWNERDEAD - permission_denied, // EACCES - protocol_error, // EPROTO - protocol_not_supported, // EPROTONOSUPPORT - read_only_file_system, // EROFS - resource_deadlock_would_occur, // EDEADLK - resource_unavailable_try_again, // EAGAIN - result_out_of_range, // ERANGE - state_not_recoverable, // ENOTRECOVERABLE - stream_timeout, // ETIME - text_file_busy, // ETXTBSY - timed_out, // ETIMEDOUT - too_many_files_open_in_system, // ENFILE - too_many_files_open, // EMFILE - too_many_links, // EMLINK - too_many_symbolic_link_levels, // ELOOP - value_too_large, // EOVERFLOW - wrong_protocol_type // EPROTOTYPE -}; - template <> struct is_error_condition_enum : true_type { } @@ -225,8 +143,7 @@ template <> struct hash; */ -#include <__config> -#include +#include <__errc> #include #include #include <__functional_base> @@ -260,109 +177,6 @@ template _LIBCPP_INLINE_VAR constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value; #endif -// Some error codes are not present on all platforms, so we provide equivalents -// for them: - -//enum class errc -_LIBCPP_DECLARE_STRONG_ENUM(errc) -{ - address_family_not_supported = EAFNOSUPPORT, - address_in_use = EADDRINUSE, - address_not_available = EADDRNOTAVAIL, - already_connected = EISCONN, - argument_list_too_long = E2BIG, - argument_out_of_domain = EDOM, - bad_address = EFAULT, - bad_file_descriptor = EBADF, - bad_message = EBADMSG, - broken_pipe = EPIPE, - connection_aborted = ECONNABORTED, - connection_already_in_progress = EALREADY, - connection_refused = ECONNREFUSED, - connection_reset = ECONNRESET, - cross_device_link = EXDEV, - destination_address_required = EDESTADDRREQ, - device_or_resource_busy = EBUSY, - directory_not_empty = ENOTEMPTY, - executable_format_error = ENOEXEC, - file_exists = EEXIST, - file_too_large = EFBIG, - filename_too_long = ENAMETOOLONG, - function_not_supported = ENOSYS, - host_unreachable = EHOSTUNREACH, - identifier_removed = EIDRM, - illegal_byte_sequence = EILSEQ, - inappropriate_io_control_operation = ENOTTY, - interrupted = EINTR, - invalid_argument = EINVAL, - invalid_seek = ESPIPE, - io_error = EIO, - is_a_directory = EISDIR, - message_size = EMSGSIZE, - network_down = ENETDOWN, - network_reset = ENETRESET, - network_unreachable = ENETUNREACH, - no_buffer_space = ENOBUFS, - no_child_process = ECHILD, - no_link = ENOLINK, - no_lock_available = ENOLCK, -#ifdef ENODATA - no_message_available = ENODATA, -#else - no_message_available = ENOMSG, -#endif - no_message = ENOMSG, - no_protocol_option = ENOPROTOOPT, - no_space_on_device = ENOSPC, -#ifdef ENOSR - no_stream_resources = ENOSR, -#else - no_stream_resources = ENOMEM, -#endif - no_such_device_or_address = ENXIO, - no_such_device = ENODEV, - no_such_file_or_directory = ENOENT, - no_such_process = ESRCH, - not_a_directory = ENOTDIR, - not_a_socket = ENOTSOCK, -#ifdef ENOSTR - not_a_stream = ENOSTR, -#else - not_a_stream = EINVAL, -#endif - not_connected = ENOTCONN, - not_enough_memory = ENOMEM, - not_supported = ENOTSUP, - operation_canceled = ECANCELED, - operation_in_progress = EINPROGRESS, - operation_not_permitted = EPERM, - operation_not_supported = EOPNOTSUPP, - operation_would_block = EWOULDBLOCK, - owner_dead = EOWNERDEAD, - permission_denied = EACCES, - protocol_error = EPROTO, - protocol_not_supported = EPROTONOSUPPORT, - read_only_file_system = EROFS, - resource_deadlock_would_occur = EDEADLK, - resource_unavailable_try_again = EAGAIN, - result_out_of_range = ERANGE, - state_not_recoverable = ENOTRECOVERABLE, -#ifdef ETIME - stream_timeout = ETIME, -#else - stream_timeout = ETIMEDOUT, -#endif - text_file_busy = ETXTBSY, - timed_out = ETIMEDOUT, - too_many_files_open_in_system = ENFILE, - too_many_files_open = EMFILE, - too_many_links = EMLINK, - too_many_symbolic_link_levels = ELOOP, - value_too_large = EOVERFLOW, - wrong_protocol_type = EPROTOTYPE -}; -_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(errc) - template <> struct _LIBCPP_TEMPLATE_VIS is_error_condition_enum : true_type { }; @@ -385,11 +199,11 @@ class _LIBCPP_TYPE_VIS error_category public: virtual ~error_category() _NOEXCEPT; -#if defined(_LIBCPP_BUILDING_SYSTEM_ERROR) && \ +#if defined(_LIBCPP_BUILDING_LIBRARY) && \ defined(_LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS) error_category() _NOEXCEPT; #else - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 error_category() _NOEXCEPT _LIBCPP_DEFAULT #endif private: @@ -403,13 +217,13 @@ public: virtual bool equivalent(const error_code& __code, int __condition) const _NOEXCEPT; virtual string message(int __ev) const = 0; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool operator==(const error_category& __rhs) const _NOEXCEPT {return this == &__rhs;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool operator!=(const error_category& __rhs) const _NOEXCEPT {return !(*this == __rhs);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY bool operator< (const error_category& __rhs) const _NOEXCEPT {return this < &__rhs;} friend class _LIBCPP_HIDDEN __do_message; @@ -430,21 +244,21 @@ class _LIBCPP_TYPE_VIS error_condition int __val_; const error_category* __cat_; public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_condition() _NOEXCEPT : __val_(0), __cat_(&generic_category()) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_condition(int __val, const error_category& __cat) _NOEXCEPT : __val_(__val), __cat_(&__cat) {} template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_condition(_Ep __e, typename enable_if::value>::type* = 0 ) _NOEXCEPT {*this = make_error_condition(__e);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void assign(int __val, const error_category& __cat) _NOEXCEPT { __val_ = __val; @@ -452,7 +266,7 @@ public: } template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY typename enable_if < is_error_condition_enum<_Ep>::value, @@ -461,21 +275,21 @@ public: operator=(_Ep __e) _NOEXCEPT {*this = make_error_condition(__e); return *this;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT { __val_ = 0; __cat_ = &generic_category(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int value() const _NOEXCEPT {return __val_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const error_category& category() const _NOEXCEPT {return *__cat_;} string message() const; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const _NOEXCEPT {return __val_ != 0;} }; @@ -502,21 +316,21 @@ class _LIBCPP_TYPE_VIS error_code int __val_; const error_category* __cat_; public: - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_code() _NOEXCEPT : __val_(0), __cat_(&system_category()) {} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_code(int __val, const error_category& __cat) _NOEXCEPT : __val_(__val), __cat_(&__cat) {} template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_code(_Ep __e, typename enable_if::value>::type* = 0 ) _NOEXCEPT {*this = make_error_code(__e);} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void assign(int __val, const error_category& __cat) _NOEXCEPT { __val_ = __val; @@ -524,7 +338,7 @@ public: } template - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY typename enable_if < is_error_code_enum<_Ep>::value, @@ -533,26 +347,26 @@ public: operator=(_Ep __e) _NOEXCEPT {*this = make_error_code(__e); return *this;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT { __val_ = 0; __cat_ = &system_category(); } - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY int value() const _NOEXCEPT {return __val_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const error_category& category() const _NOEXCEPT {return *__cat_;} - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY error_condition default_error_condition() const _NOEXCEPT {return __cat_->default_error_condition(__val_);} string message() const; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const _NOEXCEPT {return __val_ != 0;} }; @@ -658,7 +472,7 @@ public: system_error(int __ev, const error_category& __ecat); ~system_error() _NOEXCEPT; - _LIBCPP_ALWAYS_INLINE + _LIBCPP_INLINE_VISIBILITY const error_code& code() const _NOEXCEPT {return __ec_;} private: diff --git a/include/c++/v1/tuple b/include/c++/v1/tuple index b3a17e7..fb5428e 100644 --- a/include/c++/v1/tuple +++ b/include/c++/v1/tuple @@ -141,6 +141,7 @@ template #include #include <__functional_base> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/include/c++/v1/type_traits b/include/c++/v1/type_traits index 5cb3b5c..7ff67bf 100644 --- a/include/c++/v1/type_traits +++ b/include/c++/v1/type_traits @@ -21,7 +21,7 @@ namespace std template struct integral_constant; typedef integral_constant true_type; // C++11 typedef integral_constant false_type; // C++11 - + template // C++14 using bool_constant = integral_constant; // C++14 typedef bool_constant true_type; // C++14 @@ -75,6 +75,10 @@ namespace std template struct remove_pointer; template struct add_pointer; + template struct type_identity; // C++20 + template + using type_identity_t = typename type_identity::type; // C++20 + // Integral properties: template struct is_signed; template struct is_unsigned; @@ -172,7 +176,7 @@ namespace std using add_volatile_t = typename add_volatile::type; // C++14 template using add_cv_t = typename add_cv::type; // C++14 - + // reference modifications: template using remove_reference_t = typename remove_reference::type; // C++14 @@ -180,13 +184,13 @@ namespace std using add_lvalue_reference_t = typename add_lvalue_reference::type; // C++14 template using add_rvalue_reference_t = typename add_rvalue_reference::type; // C++14 - + // sign modifications: template using make_signed_t = typename make_signed::type; // C++14 template using make_unsigned_t = typename make_unsigned::type; // C++14 - + // array modifications: template using remove_extent_t = typename remove_extent::type; // C++14 @@ -223,7 +227,7 @@ namespace std template using void_t = void; // C++17 - + // See C++14 20.10.4.1, primary type categories template inline constexpr bool is_void_v = is_void::value; // C++17 @@ -386,13 +390,13 @@ namespace std // [meta.logical], logical operator traits: template struct conjunction; // C++17 - template + template inline constexpr bool conjunction_v = conjunction::value; // C++17 template struct disjunction; // C++17 template inline constexpr bool disjunction_v = disjunction::value; // C++17 template struct negation; // C++17 - template + template inline constexpr bool negation_v = negation::value; // C++17 } @@ -400,6 +404,7 @@ namespace std */ #include <__config> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -595,7 +600,7 @@ template struct __and_<_B0, _B1> : conditional<_B0::value, _B1, _B0>::type {}; template -struct __and_<_B0, _B1, _B2, _Bn...> +struct __and_<_B0, _B1, _B2, _Bn...> : conditional<_B0::value, __and_<_B1, _B2, _Bn...>, _B0>::type {}; // __or_ @@ -608,11 +613,11 @@ template struct __or_<_B0, _B1> : conditional<_B0::value, _B0, _B1>::type {}; template -struct __or_<_B0, _B1, _B2, _Bn...> +struct __or_<_B0, _B1, _B2, _Bn...> : conditional<_B0::value, _B0, __or_<_B1, _B2, _Bn...> >::type {}; // __not_ -template +template struct __not_ : conditional<_Tp::value, false_type, true_type>::type {}; #endif // !defined(_LIBCPP_CXX03_LANG) @@ -733,6 +738,12 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v // is_floating_point template struct __libcpp_is_floating_point : public false_type {}; +#ifdef __clang__ +template <> struct __libcpp_is_floating_point<__fp16> : public true_type {}; +#endif +#ifdef __FLT16_MANT_DIG__ +template <> struct __libcpp_is_floating_point<_Float16> : public true_type {}; +#endif template <> struct __libcpp_is_floating_point : public true_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; @@ -897,7 +908,7 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_function_v // template struct __libcpp_is_member_function_pointer : public false_type {}; // template struct __libcpp_is_member_function_pointer<_Tp _Up::*> : public is_function<_Tp> {}; -// +// template struct __member_pointer_traits_imp @@ -1178,7 +1189,7 @@ struct __is_same_uncvref : is_same::type, typename __uncvref<_Up>::type> {}; #if _LIBCPP_STD_VER > 17 -// aligned_union - same as __uncvref +// remove_cvref - same as __uncvref template struct remove_cvref : public __uncvref<_Tp> {}; @@ -1205,12 +1216,12 @@ template using remove_pointer_t = typename remove_pointer<_Tp>::type // add_pointer -template ::value || +template ::value || is_same::type, void>::value> struct __add_pointer_impl {typedef typename remove_reference<_Tp>::type* type;}; -template struct __add_pointer_impl<_Tp, false> +template struct __add_pointer_impl<_Tp, false> {typedef _Tp type;}; template struct _LIBCPP_TEMPLATE_VIS add_pointer @@ -1220,6 +1231,12 @@ template struct _LIBCPP_TEMPLATE_VIS add_pointer template using add_pointer_t = typename add_pointer<_Tp>::type; #endif +// type_identity +#if _LIBCPP_STD_VER > 17 +template struct type_identity { typedef _Tp type; }; +template using type_identity_t = typename type_identity<_Tp>::type; +#endif + // is_signed template ::value> @@ -1624,7 +1641,7 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_virtual_destructor_v #if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS) template struct _LIBCPP_TEMPLATE_VIS has_unique_object_representations - : public integral_constant>)> {}; #if !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) @@ -2235,7 +2252,7 @@ struct __is_destructor_wellformed { template static __two __test (...); - + static const bool value = sizeof(__test<_Tp>(12)) == sizeof(char); }; @@ -2243,8 +2260,8 @@ template struct __destructible_imp; template -struct __destructible_imp<_Tp, false> - : public _VSTD::integral_constant + : public _VSTD::integral_constant::type>::value> {}; template @@ -3395,7 +3412,7 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_default_constructible_v template struct _LIBCPP_TEMPLATE_VIS is_copy_constructible - : public is_constructible<_Tp, + : public is_constructible<_Tp, typename add_lvalue_reference::type>::type> {}; #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) @@ -4103,7 +4120,7 @@ template struct _LIBCPP_TEMPLATE_VIS is_literal_type is_reference::type>::value> #endif {}; - + #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_literal_type_v @@ -4119,7 +4136,7 @@ template struct _LIBCPP_TEMPLATE_VIS is_standard_layout : integral_constant::type>::value> #endif {}; - + #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_standard_layout_v @@ -4137,7 +4154,7 @@ template struct _LIBCPP_TEMPLATE_VIS is_trivially_copyable : integral_constant::type>::value> #endif {}; - + #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copyable_v @@ -4168,147 +4185,6 @@ template struct __is_reference_wrapper #ifndef _LIBCPP_CXX03_LANG -// Check for complete types - -template struct __check_complete; - -template <> -struct __check_complete<> -{ -}; - -template -struct __check_complete<_Hp, _T0, _Tp...> - : private __check_complete<_Hp>, - private __check_complete<_T0, _Tp...> -{ -}; - -template -struct __check_complete<_Hp, _Hp> - : private __check_complete<_Hp> -{ -}; - -template -struct __check_complete<_Tp> -{ - static_assert(sizeof(_Tp) > 0, "Type must be complete."); -}; - -template -struct __check_complete<_Tp&> - : private __check_complete<_Tp> -{ -}; - -template -struct __check_complete<_Tp&&> - : private __check_complete<_Tp> -{ -}; - -template -struct __check_complete<_Rp (*)(_Param...)> - : private __check_complete<_Rp> -{ -}; - -template -struct __check_complete -{ -}; - -template -struct __check_complete<_Rp (_Param...)> - : private __check_complete<_Rp> -{ -}; - -template -struct __check_complete -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...)> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) volatile> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const volatile> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) &> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) volatile&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const volatile&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) &&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const&&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) volatile&&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp (_Class::*)(_Param...) const volatile&&> - : private __check_complete<_Class> -{ -}; - -template -struct __check_complete<_Rp _Class::*> - : private __check_complete<_Class> -{ -}; - - template ::type, class _DecayA0 = typename decay<_A0>::type, @@ -4491,8 +4367,9 @@ _LIBCPP_INVOKE_RETURN(_VSTD::forward<_Fp>(__f)(_VSTD::forward<_Args>(__args)...) template struct __invokable_r - : private __check_complete<_Fp> { + // FIXME: Check that _Ret, _Fp, and _Args... are all complete types, cv void, + // or incomplete array types as required by the standard. using _Result = decltype( _VSTD::__invoke(_VSTD::declval<_Fp>(), _VSTD::declval<_Args>()...)); @@ -4882,7 +4759,6 @@ struct __has_operator_addressof #if _LIBCPP_STD_VER > 14 -#define __cpp_lib_void_t 201411 template using void_t = void; # ifndef _LIBCPP_HAS_NO_VARIADICS @@ -4964,7 +4840,7 @@ template constexpr typename enable_if, byte>::type & operator<<=(byte& __lhs, _Integer __shift) noexcept { return __lhs = __lhs << __shift; } - + template constexpr typename enable_if, byte>::type operator<< (byte __lhs, _Integer __shift) noexcept @@ -4979,7 +4855,7 @@ template constexpr typename enable_if, byte>::type operator>> (byte __lhs, _Integer __shift) noexcept { return static_cast(static_cast(static_cast(__lhs) >> __shift)); } - + template constexpr typename enable_if, _Integer>::type to_integer(byte __b) noexcept { return static_cast<_Integer>(__b); } diff --git a/include/c++/v1/typeinfo b/include/c++/v1/typeinfo index 74d94e9..92f1e22 100644 --- a/include/c++/v1/typeinfo +++ b/include/c++/v1/typeinfo @@ -226,13 +226,13 @@ public: #endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_cast() { #ifndef _LIBCPP_NO_EXCEPTIONS throw bad_cast(); #else - _VSTD::abort(); + _VSTD::abort(); #endif } _LIBCPP_END_NAMESPACE_STD diff --git a/include/c++/v1/unordered_map b/include/c++/v1/unordered_map index 725cb6a..fc3cfb6 100644 --- a/include/c++/v1/unordered_map +++ b/include/c++/v1/unordered_map @@ -44,6 +44,9 @@ public: typedef /unspecified/ local_iterator; typedef /unspecified/ const_local_iterator; + typedef unspecified node_type; // C++17 + typedef INSERT_RETURN_TYPE insert_return_type; // C++17 + unordered_map() noexcept( is_nothrow_default_constructible::value && @@ -122,6 +125,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + insert_return_type insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + template pair try_emplace(const key_type& k, Args&&... args); // C++17 template @@ -226,6 +234,8 @@ public: typedef /unspecified/ local_iterator; typedef /unspecified/ const_local_iterator; + typedef unspecified node_type; // C++17 + unordered_multimap() noexcept( is_nothrow_default_constructible::value && @@ -304,6 +314,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + iterator insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -367,9 +382,11 @@ template #include <__config> #include <__hash_table> +#include <__node_handle> #include #include #include +#include #include <__debug> @@ -396,7 +413,7 @@ public: const _Hash& hash_function() const _NOEXCEPT {return *this;} _LIBCPP_INLINE_VISIBILITY size_t operator()(const _Cp& __x) const - {return static_cast(*this)(__x.__cc.first);} + {return static_cast(*this)(__x.__get_value().first);} _LIBCPP_INLINE_VISIBILITY size_t operator()(const _Key& __x) const {return static_cast(*this)(__x);} @@ -425,7 +442,7 @@ public: const _Hash& hash_function() const _NOEXCEPT {return __hash_;} _LIBCPP_INLINE_VISIBILITY size_t operator()(const _Cp& __x) const - {return __hash_(__x.__cc.first);} + {return __hash_(__x.__get_value().first);} _LIBCPP_INLINE_VISIBILITY size_t operator()(const _Key& __x) const {return __hash_(__x);} @@ -464,13 +481,13 @@ public: const _Pred& key_eq() const _NOEXCEPT {return *this;} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Cp& __x, const _Cp& __y) const - {return static_cast(*this)(__x.__cc.first, __y.__cc.first);} + {return static_cast(*this)(__x.__get_value().first, __y.__get_value().first);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Cp& __x, const _Key& __y) const - {return static_cast(*this)(__x.__cc.first, __y);} + {return static_cast(*this)(__x.__get_value().first, __y);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Key& __x, const _Cp& __y) const - {return static_cast(*this)(__x, __y.__cc.first);} + {return static_cast(*this)(__x, __y.__get_value().first);} void swap(__unordered_map_equal&__y) _NOEXCEPT_(__is_nothrow_swappable<_Pred>::value) { @@ -496,13 +513,13 @@ public: const _Pred& key_eq() const _NOEXCEPT {return __pred_;} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Cp& __x, const _Cp& __y) const - {return __pred_(__x.__cc.first, __y.__cc.first);} + {return __pred_(__x.__get_value().first, __y.__get_value().first);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Cp& __x, const _Key& __y) const - {return __pred_(__x.__cc.first, __y);} + {return __pred_(__x.__get_value().first, __y);} _LIBCPP_INLINE_VISIBILITY bool operator()(const _Key& __x, const _Cp& __y) const - {return __pred_(__x, __y.__cc.first);} + {return __pred_(__x, __y.__get_value().first);} void swap(__unordered_map_equal&__y) _NOEXCEPT_(__is_nothrow_swappable<_Pred>::value) { @@ -572,9 +589,9 @@ public: void operator()(pointer __p) _NOEXCEPT { if (__second_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__cc.second)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().second)); if (__first_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__cc.first)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().first)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -582,23 +599,67 @@ public: #ifndef _LIBCPP_CXX03_LANG template -union __hash_value_type +struct __hash_value_type { typedef _Key key_type; typedef _Tp mapped_type; typedef pair value_type; - typedef pair __nc_value_type; + typedef pair __nc_ref_pair_type; + typedef pair __nc_rref_pair_type; +private: value_type __cc; - __nc_value_type __nc; + +public: + _LIBCPP_INLINE_VISIBILITY + value_type& __get_value() + { +#if _LIBCPP_STD_VER > 14 + return *_VSTD::launder(_VSTD::addressof(__cc)); +#else + return __cc; +#endif + } + + _LIBCPP_INLINE_VISIBILITY + const value_type& __get_value() const + { +#if _LIBCPP_STD_VER > 14 + return *_VSTD::launder(_VSTD::addressof(__cc)); +#else + return __cc; +#endif + } + + _LIBCPP_INLINE_VISIBILITY + __nc_ref_pair_type __ref() + { + value_type& __v = __get_value(); + return __nc_ref_pair_type(const_cast(__v.first), __v.second); + } + + _LIBCPP_INLINE_VISIBILITY + __nc_rref_pair_type __move() + { + value_type& __v = __get_value(); + return __nc_rref_pair_type( + _VSTD::move(const_cast(__v.first)), + _VSTD::move(__v.second)); + } _LIBCPP_INLINE_VISIBILITY __hash_value_type& operator=(const __hash_value_type& __v) - {__nc = __v.__cc; return *this;} + { + __ref() = __v.__get_value(); + return *this; + } _LIBCPP_INLINE_VISIBILITY __hash_value_type& operator=(__hash_value_type&& __v) - {__nc = _VSTD::move(__v.__nc); return *this;} + { + __ref() = __v.__move(); + return *this; + } template ::type > _LIBCPP_INLINE_VISIBILITY - __hash_value_type& operator=(_ValueTp&& __v) { - __nc = _VSTD::forward<_ValueTp>(__v); return *this; + __hash_value_type& operator=(_ValueTp&& __v) + { + __ref() = _VSTD::forward<_ValueTp>(__v); + return *this; } private: @@ -628,8 +691,15 @@ struct __hash_value_type typedef _Tp mapped_type; typedef pair value_type; +private: value_type __cc; +public: + _LIBCPP_INLINE_VISIBILITY + value_type& __get_value() { return __cc; } + _LIBCPP_INLINE_VISIBILITY + const value_type& __get_value() const { return __cc; } + private: ~__hash_value_type(); }; @@ -657,9 +727,9 @@ public: __hash_map_iterator(_HashIterator __i) _NOEXCEPT : __i_(__i) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __i_->__cc;} + reference operator*() const {return __i_->__get_value();} _LIBCPP_INLINE_VISIBILITY - pointer operator->() const {return pointer_traits::pointer_to(__i_->__cc);} + pointer operator->() const {return pointer_traits::pointer_to(__i_->__get_value());} _LIBCPP_INLINE_VISIBILITY __hash_map_iterator& operator++() {++__i_; return *this;} @@ -711,9 +781,9 @@ public: : __i_(__i.__i_) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __i_->__cc;} + reference operator*() const {return __i_->__get_value();} _LIBCPP_INLINE_VISIBILITY - pointer operator->() const {return pointer_traits::pointer_to(__i_->__cc);} + pointer operator->() const {return pointer_traits::pointer_to(__i_->__get_value());} _LIBCPP_INLINE_VISIBILITY __hash_map_const_iterator& operator++() {++__i_; return *this;} @@ -750,7 +820,6 @@ public: typedef _Pred key_equal; typedef _Alloc allocator_type; typedef pair value_type; - typedef pair __nc_value_type; typedef value_type& reference; typedef const value_type& const_reference; static_assert((is_same::value), @@ -791,6 +860,11 @@ public: typedef __hash_map_iterator local_iterator; typedef __hash_map_const_iterator const_local_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __map_node_handle<__node, allocator_type> node_type; + typedef __insert_return_type insert_return_type; +#endif + _LIBCPP_INLINE_VISIBILITY unordered_map() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) @@ -1084,7 +1158,37 @@ public: iterator erase(const_iterator __first, const_iterator __last) {return __table_.erase(__first.__i_, __last.__i_);} _LIBCPP_INLINE_VISIBILITY - void clear() _NOEXCEPT {__table_.clear();} + void clear() _NOEXCEPT {__table_.clear();} + +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + insert_return_type insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_map::insert()"); + return __table_.template __node_handle_insert_unique< + node_type, insert_return_type>(_VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_map::insert()"); + return __table_.template __node_handle_insert_unique( + __hint.__i_, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __table_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __table_.template __node_handle_extract( + __it.__i_); + } +#endif _LIBCPP_INLINE_VISIBILITY void swap(unordered_map& __u) @@ -1298,8 +1402,8 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( { iterator __i = __u.begin(); while (__u.size() != 0) { - __table_.__emplace_unique(_VSTD::move( - __u.__table_.remove((__i++).__i_)->__value_.__nc)); + __table_.__emplace_unique( + __u.__table_.remove((__i++).__i_)->__value_.__move()); } } #if _LIBCPP_DEBUG_LEVEL >= 2 @@ -1385,7 +1489,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator[](const key_type& __k) { return __table_.__emplace_unique_key_args(__k, std::piecewise_construct, std::forward_as_tuple(__k), - std::forward_as_tuple()).first->__cc.second; + std::forward_as_tuple()).first->__get_value().second; } template @@ -1394,7 +1498,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator[](key_type&& __k) { return __table_.__emplace_unique_key_args(__k, std::piecewise_construct, std::forward_as_tuple(std::move(__k)), - std::forward_as_tuple()).first->__cc.second; + std::forward_as_tuple()).first->__get_value().second; } #else // _LIBCPP_CXX03_LANG @@ -1404,9 +1508,9 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__construct_node_with_key(const { __node_allocator& __na = __table_.__node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__cc.first), __k); + __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().first), __k); __h.get_deleter().__first_constructed = true; - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__cc.second)); + __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().second)); __h.get_deleter().__second_constructed = true; return _LIBCPP_EXPLICIT_MOVE(__h); // explicitly moved for C++03 } @@ -1500,7 +1604,6 @@ public: typedef _Pred key_equal; typedef _Alloc allocator_type; typedef pair value_type; - typedef pair __nc_value_type; typedef value_type& reference; typedef const value_type& const_reference; static_assert((is_same::value), @@ -1539,6 +1642,10 @@ public: typedef __hash_map_iterator local_iterator; typedef __hash_map_const_iterator const_local_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __map_node_handle<__node, allocator_type> node_type; +#endif + _LIBCPP_INLINE_VISIBILITY unordered_multimap() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) @@ -1712,6 +1819,36 @@ public: _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT {__table_.clear();} +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + iterator insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_multimap::insert()"); + return __table_.template __node_handle_insert_multi( + _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_multimap::insert()"); + return __table_.template __node_handle_insert_multi( + __hint.__i_, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __table_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __table_.template __node_handle_extract( + __it.__i_); + } +#endif + _LIBCPP_INLINE_VISIBILITY void swap(unordered_multimap& __u) _NOEXCEPT_(__is_nothrow_swappable<__table>::value) @@ -1915,8 +2052,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( while (__u.size() != 0) { __table_.__insert_multi( - _VSTD::move(__u.__table_.remove((__i++).__i_)->__value_.__nc) - ); + __u.__table_.remove((__i++).__i_)->__value_.__move()); } } #if _LIBCPP_DEBUG_LEVEL >= 2 diff --git a/include/c++/v1/unordered_set b/include/c++/v1/unordered_set index 3ae024a..a219fa6 100644 --- a/include/c++/v1/unordered_set +++ b/include/c++/v1/unordered_set @@ -43,6 +43,9 @@ public: typedef /unspecified/ local_iterator; typedef /unspecified/ const_local_iterator; + typedef unspecified node_type unspecified; // C++17 + typedef INSERT_RETURN_TYPE insert_return_type; // C++17 + unordered_set() noexcept( is_nothrow_default_constructible::value && @@ -113,6 +116,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + insert_return_type insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -191,6 +199,8 @@ public: typedef /unspecified/ local_iterator; typedef /unspecified/ const_local_iterator; + typedef unspecified node_type unspecified; // C++17 + unordered_multiset() noexcept( is_nothrow_default_constructible::value && @@ -261,6 +271,11 @@ public: void insert(InputIterator first, InputIterator last); void insert(initializer_list); + node_type extract(const_iterator position); // C++17 + node_type extract(const key_type& x); // C++17 + iterator insert(node_type&& nh); // C++17 + iterator insert(const_iterator hint, node_type&& nh); // C++17 + iterator erase(const_iterator position); iterator erase(iterator position); // C++14 size_type erase(const key_type& k); @@ -321,7 +336,9 @@ template #include <__config> #include <__hash_table> +#include <__node_handle> #include +#include #include <__debug> @@ -363,6 +380,11 @@ public: typedef typename __table::const_local_iterator local_iterator; typedef typename __table::const_local_iterator const_local_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __set_node_handle node_type; + typedef __insert_return_type insert_return_type; +#endif + _LIBCPP_INLINE_VISIBILITY unordered_set() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) @@ -541,6 +563,35 @@ public: _LIBCPP_INLINE_VISIBILITY void clear() _NOEXCEPT {__table_.clear();} +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + insert_return_type insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_set::insert()"); + return __table_.template __node_handle_insert_unique< + node_type, insert_return_type>(_VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __h, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_set::insert()"); + return __table_.template __node_handle_insert_unique( + __h, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __table_.template __node_handle_extract(__key); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __it) + { + return __table_.template __node_handle_extract(__it); + } +#endif + _LIBCPP_INLINE_VISIBILITY void swap(unordered_set& __u) _NOEXCEPT_(__is_nothrow_swappable<__table>::value) @@ -883,6 +934,10 @@ public: typedef typename __table::const_local_iterator local_iterator; typedef typename __table::const_local_iterator const_local_iterator; +#if _LIBCPP_STD_VER > 14 + typedef __set_node_handle node_type; +#endif + _LIBCPP_INLINE_VISIBILITY unordered_multiset() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) @@ -1019,6 +1074,36 @@ public: _LIBCPP_INLINE_VISIBILITY void insert(_InputIterator __first, _InputIterator __last); +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY + iterator insert(node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_multiset::insert()"); + return __table_.template __node_handle_insert_multi( + _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + iterator insert(const_iterator __hint, node_type&& __nh) + { + _LIBCPP_ASSERT(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to unordered_multiset::insert()"); + return __table_.template __node_handle_insert_multi( + __hint, _VSTD::move(__nh)); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(const_iterator __position) + { + return __table_.template __node_handle_extract( + __position); + } + _LIBCPP_INLINE_VISIBILITY + node_type extract(key_type const& __key) + { + return __table_.template __node_handle_extract(__key); + } +#endif + _LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __p) {return __table_.erase(__p);} _LIBCPP_INLINE_VISIBILITY diff --git a/include/c++/v1/utility b/include/c++/v1/utility index f11d580..5388585 100644 --- a/include/c++/v1/utility +++ b/include/c++/v1/utility @@ -14,6 +14,8 @@ /* utility synopsis +#include + namespace std { @@ -201,6 +203,7 @@ template #include #include #include +#include #include <__debug> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -293,7 +296,7 @@ template void as_const(const _Tp&&) = delete; #endif struct _LIBCPP_TEMPLATE_VIS piecewise_construct_t { }; -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_UTILITY) +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) extern const piecewise_construct_t piecewise_construct;// = piecewise_construct_t(); #else /* _LIBCPP_INLINE_VAR */ constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t(); @@ -1451,7 +1454,7 @@ struct _LIBCPP_TEMPLATE_VIS hash size_t operator()(float __v) const _NOEXCEPT { // -0.0 and 0.0 should return same hash - if (__v == 0) + if (__v == 0.0) return 0; return __scalar_hash::operator()(__v); } @@ -1465,7 +1468,7 @@ struct _LIBCPP_TEMPLATE_VIS hash size_t operator()(double __v) const _NOEXCEPT { // -0.0 and 0.0 should return same hash - if (__v == 0) + if (__v == 0.0) return 0; return __scalar_hash::operator()(__v); } @@ -1479,7 +1482,7 @@ struct _LIBCPP_TEMPLATE_VIS hash size_t operator()(long double __v) const _NOEXCEPT { // -0.0 and 0.0 should return same hash - if (__v == 0) + if (__v == 0.0) return 0; #if defined(__i386__) // Zero out padding bits diff --git a/include/c++/v1/variant b/include/c++/v1/variant index f9098f4..f9505bf 100644 --- a/include/c++/v1/variant +++ b/include/c++/v1/variant @@ -208,6 +208,7 @@ namespace std { #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -1320,7 +1321,7 @@ constexpr bool holds_alternative(const variant<_Types...>& __v) noexcept { template inline _LIBCPP_INLINE_VISIBILITY -static constexpr auto&& __generic_get(_Vp&& __v) { +constexpr auto&& __generic_get(_Vp&& __v) { using __variant_detail::__access::__variant; if (!__holds_alternative<_Ip>(__v)) { __throw_bad_variant_access(); @@ -1437,6 +1438,16 @@ get_if(const variant<_Types...>* __v) noexcept { return _VSTD::get_if<__find_exactly_one_t<_Tp, _Types...>::value>(__v); } +template +struct __convert_to_bool { + template + _LIBCPP_INLINE_VISIBILITY constexpr bool operator()(_T1 && __t1, _T2&& __t2) const { + static_assert(std::is_convertible(__t1), _VSTD::forward<_T2>(__t2))), bool>::value, + "the relational operator does not return a type which is implicitly convertible to bool"); + return _Operator{}(_VSTD::forward<_T1>(__t1), _VSTD::forward<_T2>(__t2)); + } +}; + template inline _LIBCPP_INLINE_VISIBILITY constexpr bool operator==(const variant<_Types...>& __lhs, @@ -1444,7 +1455,7 @@ constexpr bool operator==(const variant<_Types...>& __lhs, using __variant_detail::__visitation::__variant; if (__lhs.index() != __rhs.index()) return false; if (__lhs.valueless_by_exception()) return true; - return __variant::__visit_value_at(__lhs.index(), equal_to<>{}, __lhs, __rhs); + return __variant::__visit_value_at(__lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template @@ -1455,7 +1466,7 @@ constexpr bool operator!=(const variant<_Types...>& __lhs, if (__lhs.index() != __rhs.index()) return true; if (__lhs.valueless_by_exception()) return false; return __variant::__visit_value_at( - __lhs.index(), not_equal_to<>{}, __lhs, __rhs); + __lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template @@ -1467,7 +1478,7 @@ constexpr bool operator<(const variant<_Types...>& __lhs, if (__lhs.valueless_by_exception()) return true; if (__lhs.index() < __rhs.index()) return true; if (__lhs.index() > __rhs.index()) return false; - return __variant::__visit_value_at(__lhs.index(), less<>{}, __lhs, __rhs); + return __variant::__visit_value_at(__lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template @@ -1479,7 +1490,7 @@ constexpr bool operator>(const variant<_Types...>& __lhs, if (__rhs.valueless_by_exception()) return true; if (__lhs.index() > __rhs.index()) return true; if (__lhs.index() < __rhs.index()) return false; - return __variant::__visit_value_at(__lhs.index(), greater<>{}, __lhs, __rhs); + return __variant::__visit_value_at(__lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template @@ -1492,7 +1503,7 @@ constexpr bool operator<=(const variant<_Types...>& __lhs, if (__lhs.index() < __rhs.index()) return true; if (__lhs.index() > __rhs.index()) return false; return __variant::__visit_value_at( - __lhs.index(), less_equal<>{}, __lhs, __rhs); + __lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template @@ -1505,7 +1516,7 @@ constexpr bool operator>=(const variant<_Types...>& __lhs, if (__lhs.index() > __rhs.index()) return true; if (__lhs.index() < __rhs.index()) return false; return __variant::__visit_value_at( - __lhs.index(), greater_equal<>{}, __lhs, __rhs); + __lhs.index(), __convert_to_bool>{}, __lhs, __rhs); } template diff --git a/include/c++/v1/vector b/include/c++/v1/vector index 54b1e88..de7de09 100644 --- a/include/c++/v1/vector +++ b/include/c++/v1/vector @@ -244,6 +244,10 @@ public: bool __invariants() const; }; +template ::value_type>> + vector(InputIterator, InputIterator, Allocator = Allocator()) + -> vector::value_type, Allocator>; + template struct hash>; template bool operator==(const vector& x, const vector& y); @@ -272,6 +276,7 @@ void swap(vector& x, vector& y) #include #include #include +#include #include <__split_buffer> #include <__functional_base> @@ -291,7 +296,7 @@ template class __vector_base_common { protected: - _LIBCPP_ALWAYS_INLINE __vector_base_common() {} + _LIBCPP_INLINE_VISIBILITY __vector_base_common() {} _LIBCPP_NORETURN void __throw_length_error() const; _LIBCPP_NORETURN void __throw_out_of_range() const; }; @@ -316,13 +321,14 @@ template class __vector_base : protected __vector_base_common { -protected: - typedef _Tp value_type; +public: typedef _Allocator allocator_type; typedef allocator_traits __alloc_traits; + typedef typename __alloc_traits::size_type size_type; +protected: + typedef _Tp value_type; typedef value_type& reference; typedef const value_type& const_reference; - typedef typename __alloc_traits::size_type size_type; typedef typename __alloc_traits::difference_type difference_type; typedef typename __alloc_traits::pointer pointer; typedef typename __alloc_traits::const_pointer const_pointer; @@ -350,6 +356,9 @@ protected: __vector_base() _NOEXCEPT_(is_nothrow_default_constructible::value); _LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a); +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT; +#endif ~__vector_base(); _LIBCPP_INLINE_VISIBILITY @@ -433,6 +442,15 @@ __vector_base<_Tp, _Allocator>::__vector_base(const allocator_type& __a) { } +#ifndef _LIBCPP_CXX03_LANG +template +inline _LIBCPP_INLINE_VISIBILITY +__vector_base<_Tp, _Allocator>::__vector_base(allocator_type&& __a) _NOEXCEPT + : __begin_(nullptr), + __end_(nullptr), + __end_cap_(nullptr, std::move(__a)) {} +#endif + template __vector_base<_Tp, _Allocator>::~__vector_base() { @@ -492,8 +510,8 @@ public: #if _LIBCPP_STD_VER > 11 explicit vector(size_type __n, const allocator_type& __a); #endif - vector(size_type __n, const_reference __x); - vector(size_type __n, const_reference __x, const allocator_type& __a); + vector(size_type __n, const value_type& __x); + vector(size_type __n, const value_type& __x, const allocator_type& __a); template vector(_InputIterator __first, typename enable_if<__is_input_iterator <_InputIterator>::value && @@ -523,13 +541,14 @@ public: value_type, typename iterator_traits<_ForwardIterator>::reference>::value>::type* = 0); -#if _LIBCPP_DEBUG_LEVEL >= 2 _LIBCPP_INLINE_VISIBILITY ~vector() { + __annotate_delete(); +#if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__erase_c(this); - } #endif + } vector(const vector& __x); vector(const vector& __x, const allocator_type& __a); @@ -776,8 +795,8 @@ public: private: _LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators(); _LIBCPP_INLINE_VISIBILITY void __invalidate_iterators_past(pointer __new_last); - void allocate(size_type __n); - void deallocate() _NOEXCEPT; + void __vallocate(size_type __n); + void __vdeallocate() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const; void __construct_at_end(size_type __n); _LIBCPP_INLINE_VISIBILITY @@ -890,6 +909,22 @@ private: }; +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template::value_type>, + class = typename enable_if<__is_allocator<_Alloc>::value, void>::type + > +vector(_InputIterator, _InputIterator) + -> vector::value_type, _Alloc>; + +template::value, void>::type + > +vector(_InputIterator, _InputIterator, _Alloc) + -> vector::value_type, _Alloc>; +#endif + template void vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v) @@ -930,7 +965,7 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer void -vector<_Tp, _Allocator>::allocate(size_type __n) +vector<_Tp, _Allocator>::__vallocate(size_type __n) { if (__n > max_size()) this->__throw_length_error(); @@ -941,7 +976,7 @@ vector<_Tp, _Allocator>::allocate(size_type __n) template void -vector<_Tp, _Allocator>::deallocate() _NOEXCEPT +vector<_Tp, _Allocator>::__vdeallocate() _NOEXCEPT { if (this->__begin_ != nullptr) { @@ -1077,7 +1112,7 @@ vector<_Tp, _Allocator>::vector(size_type __n) #endif if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n); } } @@ -1092,27 +1127,27 @@ vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a) #endif if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n); } } #endif template -vector<_Tp, _Allocator>::vector(size_type __n, const_reference __x) +vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x) { #if _LIBCPP_DEBUG_LEVEL >= 2 __get_db()->__insert_c(this); #endif if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, __x); } } template -vector<_Tp, _Allocator>::vector(size_type __n, const_reference __x, const allocator_type& __a) +vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a) : __base(__a) { #if _LIBCPP_DEBUG_LEVEL >= 2 @@ -1120,7 +1155,7 @@ vector<_Tp, _Allocator>::vector(size_type __n, const_reference __x, const alloca #endif if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, __x); } } @@ -1174,7 +1209,7 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, size_type __n = static_cast(_VSTD::distance(__first, __last)); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__first, __last, __n); } } @@ -1194,7 +1229,7 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __las size_type __n = static_cast(_VSTD::distance(__first, __last)); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__first, __last, __n); } } @@ -1209,7 +1244,7 @@ vector<_Tp, _Allocator>::vector(const vector& __x) size_type __n = __x.size(); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__x.__begin_, __x.__end_, __n); } } @@ -1224,7 +1259,7 @@ vector<_Tp, _Allocator>::vector(const vector& __x, const allocator_type& __a) size_type __n = __x.size(); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__x.__begin_, __x.__end_, __n); } } @@ -1285,7 +1320,7 @@ vector<_Tp, _Allocator>::vector(initializer_list __il) #endif if (__il.size() > 0) { - allocate(__il.size()); + __vallocate(__il.size()); __construct_at_end(__il.begin(), __il.end(), __il.size()); } } @@ -1300,7 +1335,7 @@ vector<_Tp, _Allocator>::vector(initializer_list __il, const allocat #endif if (__il.size() > 0) { - allocate(__il.size()); + __vallocate(__il.size()); __construct_at_end(__il.begin(), __il.end(), __il.size()); } } @@ -1335,7 +1370,7 @@ void vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value) { - deallocate(); + __vdeallocate(); __base::__move_assign_alloc(__c); // this can throw this->__begin_ = __c.__begin_; this->__end_ = __c.__end_; @@ -1410,8 +1445,8 @@ vector<_Tp, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __las } else { - deallocate(); - allocate(__recommend(__new_size)); + __vdeallocate(); + __vallocate(__recommend(__new_size)); __construct_at_end(__first, __last, __new_size); } __invalidate_all_iterators(); @@ -1432,8 +1467,8 @@ vector<_Tp, _Allocator>::assign(size_type __n, const_reference __u) } else { - deallocate(); - allocate(__recommend(static_cast(__n))); + __vdeallocate(); + __vallocate(__recommend(static_cast(__n))); __construct_at_end(__n, __u); } __invalidate_all_iterators(); @@ -2416,11 +2451,11 @@ public: private: _LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators(); - void allocate(size_type __n); - void deallocate() _NOEXCEPT; + void __vallocate(size_type __n); + void __vdeallocate() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY static size_type __align_it(size_type __new_size) _NOEXCEPT - {return __new_size + (__bits_per_word-1) & ~((size_type)__bits_per_word-1);}; + {return __new_size + (__bits_per_word-1) & ~((size_type)__bits_per_word-1);} _LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const; _LIBCPP_INLINE_VISIBILITY void __construct_at_end(size_type __n, bool __x); template @@ -2455,7 +2490,7 @@ private: void __copy_assign_alloc(const vector& __c, true_type) { if (__alloc() != __c.__alloc()) - deallocate(); + __vdeallocate(); __alloc() = __c.__alloc(); } @@ -2511,7 +2546,7 @@ vector::__invalidate_all_iterators() // Postcondition: size() == 0 template void -vector::allocate(size_type __n) +vector::__vallocate(size_type __n) { if (__n > max_size()) this->__throw_length_error(); @@ -2523,7 +2558,7 @@ vector::allocate(size_type __n) template void -vector::deallocate() _NOEXCEPT +vector::__vdeallocate() _NOEXCEPT { if (this->__begin_ != nullptr) { @@ -2620,7 +2655,7 @@ vector::vector(size_type __n) { if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, false); } } @@ -2634,7 +2669,7 @@ vector::vector(size_type __n, const allocator_type& __a) { if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, false); } } @@ -2648,7 +2683,7 @@ vector::vector(size_type __n, const value_type& __x) { if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, __x); } } @@ -2661,7 +2696,7 @@ vector::vector(size_type __n, const value_type& __x, const all { if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__n, __x); } } @@ -2731,7 +2766,7 @@ vector::vector(_ForwardIterator __first, _ForwardIterator __la size_type __n = static_cast(_VSTD::distance(__first, __last)); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__first, __last); } } @@ -2747,7 +2782,7 @@ vector::vector(_ForwardIterator __first, _ForwardIterator __la size_type __n = static_cast(_VSTD::distance(__first, __last)); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__first, __last); } } @@ -2763,7 +2798,7 @@ vector::vector(initializer_list __il) size_type __n = static_cast(__il.size()); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__il.begin(), __il.end()); } } @@ -2777,7 +2812,7 @@ vector::vector(initializer_list __il, const alloca size_type __n = static_cast(__il.size()); if (__n > 0) { - allocate(__n); + __vallocate(__n); __construct_at_end(__il.begin(), __il.end()); } } @@ -2800,7 +2835,7 @@ vector::vector(const vector& __v) { if (__v.size() > 0) { - allocate(__v.size()); + __vallocate(__v.size()); __construct_at_end(__v.begin(), __v.end()); } } @@ -2813,7 +2848,7 @@ vector::vector(const vector& __v, const allocator_type& __a) { if (__v.size() > 0) { - allocate(__v.size()); + __vallocate(__v.size()); __construct_at_end(__v.begin(), __v.end()); } } @@ -2829,8 +2864,8 @@ vector::operator=(const vector& __v) { if (__v.__size_ > capacity()) { - deallocate(); - allocate(__v.__size_); + __vdeallocate(); + __vallocate(__v.__size_); } _VSTD::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_); } @@ -2842,17 +2877,15 @@ vector::operator=(const vector& __v) #ifndef _LIBCPP_CXX03_LANG template -inline _LIBCPP_INLINE_VISIBILITY -vector::vector(vector&& __v) +inline _LIBCPP_INLINE_VISIBILITY vector::vector(vector&& __v) #if _LIBCPP_STD_VER > 14 - _NOEXCEPT + _NOEXCEPT #else - _NOEXCEPT_(is_nothrow_move_constructible::value) + _NOEXCEPT_(is_nothrow_move_constructible::value) #endif : __begin_(__v.__begin_), __size_(__v.__size_), - __cap_alloc_(__v.__cap_alloc_) -{ + __cap_alloc_(std::move(__v.__cap_alloc_)) { __v.__begin_ = nullptr; __v.__size_ = 0; __v.__cap() = 0; @@ -2874,7 +2907,7 @@ vector::vector(vector&& __v, const allocator_type& __a) } else if (__v.size() > 0) { - allocate(__v.size()); + __vallocate(__v.size()); __construct_at_end(__v.begin(), __v.end()); } } @@ -2905,7 +2938,7 @@ void vector::__move_assign(vector& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value) { - deallocate(); + __vdeallocate(); __move_assign_alloc(__c); this->__begin_ = __c.__begin_; this->__size_ = __c.__size_; @@ -2970,8 +3003,8 @@ vector::assign(_ForwardIterator __first, _ForwardIterator __la { if (__n > capacity()) { - deallocate(); - allocate(__n); + __vdeallocate(); + __vallocate(__n); } __construct_at_end(__first, __last); } @@ -2984,7 +3017,7 @@ vector::reserve(size_type __n) if (__n > capacity()) { vector __v(this->__alloc()); - __v.allocate(__n); + __v.__vallocate(__n); __v.__construct_at_end(this->begin(), this->end()); swap(__v); __invalidate_all_iterators(); diff --git a/include/c++/v1/version b/include/c++/v1/version index 23a872e..80c2797 100644 --- a/include/c++/v1/version +++ b/include/c++/v1/version @@ -14,6 +14,85 @@ /* version synopsis + Table 35 — Standard library feature-test macros +Macro name Value Headers +__cpp_lib_addressof_constexpr 201603L +__cpp_lib_allocator_traits_is_always_equal 201411L + + +__cpp_lib_any 201606L +__cpp_lib_apply 201603L +__cpp_lib_array_constexpr 201603L +__cpp_lib_as_const 201510L +__cpp_lib_atomic_is_always_lock_free 201603L +__cpp_lib_atomic_ref 201806L +__cpp_lib_bit_cast 201806L +__cpp_lib_bool_constant 201505L +__cpp_lib_boyer_moore_searcher 201603L +__cpp_lib_byte 201603L +__cpp_lib_chrono 201611L +__cpp_lib_clamp 201603L +__cpp_lib_complex_udls 201309L +__cpp_lib_concepts 201806L +__cpp_lib_constexpr_swap_algorithms 201806L +__cpp_lib_enable_shared_from_this 201603L +__cpp_lib_exchange_function 201304L +__cpp_lib_execution 201603L +__cpp_lib_filesystem 201703L +__cpp_lib_gcd_lcm 201606L +__cpp_lib_generic_associative_lookup 201304L +__cpp_lib_hardware_interference_size 201703L +__cpp_lib_has_unique_object_representations 201606L +__cpp_lib_hypot 201603L +__cpp_lib_incomplete_container_elements 201505L +__cpp_lib_integer_sequence 201304L +__cpp_lib_integral_constant_callable 201304L +__cpp_lib_invoke 201411L +__cpp_lib_is_aggregate 201703L +__cpp_lib_is_final 201402L +__cpp_lib_is_invocable 201703L +__cpp_lib_is_null_pointer 201309L +__cpp_lib_is_swappable 201603L +__cpp_lib_launder 201606L +__cpp_lib_list_remove_return_type 201806L +__cpp_lib_logical_traits 201510L +__cpp_lib_make_from_tuple 201606L +__cpp_lib_make_reverse_iterator 201402L +__cpp_lib_make_unique 201304L +__cpp_lib_map_try_emplace 201411L +__cpp_lib_math_special_functions 201603L +__cpp_lib_memory_resource 201603L +__cpp_lib_node_extract 201606L +__cpp_lib_nonmember_container_access 201411L + + +__cpp_lib_not_fn 201603L +__cpp_lib_null_iterators 201304L +__cpp_lib_optional 201606L +__cpp_lib_parallel_algorithm 201603L +__cpp_lib_quoted_string_io 201304L +__cpp_lib_raw_memory_algorithms 201606L +__cpp_lib_result_of_sfinae 201210L +__cpp_lib_robust_nonmodifying_seq_ops 201304L +__cpp_lib_sample 201603L +__cpp_lib_scoped_lock 201703L +__cpp_lib_shared_mutex 201505L +__cpp_lib_shared_ptr_arrays 201611L +__cpp_lib_shared_ptr_weak_type 201606L +__cpp_lib_shared_timed_mutex 201402L +__cpp_lib_string_udls 201304L +__cpp_lib_string_view 201606L +__cpp_lib_to_chars 201611L +__cpp_lib_transformation_trait_aliases 201304L +__cpp_lib_transparent_operators 201510L +__cpp_lib_tuple_element_t 201402L +__cpp_lib_tuples_by_type 201304L +__cpp_lib_type_trait_variable_templates 201510L +__cpp_lib_uncaught_exceptions 201411L +__cpp_lib_unordered_map_try_emplace 201411L +__cpp_lib_variant 201606L +__cpp_lib_void_t 201411L + */ #include <__config> @@ -22,4 +101,17 @@ #pragma GCC system_header #endif +#if _LIBCPP_STD_VER > 11 +#endif + +#if _LIBCPP_STD_VER > 14 +# define __cpp_lib_atomic_is_always_lock_free 201603L +# define __cpp_lib_filesystem 201703L +# define __cpp_lib_invoke 201411L +# define __cpp_lib_void_t 201411L +#endif + +#if _LIBCPP_STD_VER > 17 +#endif + #endif // _LIBCPP_VERSIONH diff --git a/lib/clang/7.0.0/asan_blacklist.txt b/lib/clang/7.0.0/asan_blacklist.txt deleted file mode 100644 index c25921f..0000000 --- a/lib/clang/7.0.0/asan_blacklist.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Blacklist for AddressSanitizer. Turns off instrumentation of particular -# functions or sources. Use with care. You may set location of blacklist -# at compile-time using -fsanitize-blacklist= flag. - -# Example usage: -# fun:*bad_function_name* -# src:file_with_tricky_code.cc -# global:*global_with_bad_access_or_initialization* -# global:*global_with_initialization_issues*=init -# type:*Namespace::ClassName*=init - -# Stack buffer overflow in VC/INCLUDE/xlocnum, see http://goo.gl/L4qqUG -fun:*_Find_elem@*@std* diff --git a/lib/clang/7.0.0/cfi_blacklist.txt b/lib/clang/7.0.0/cfi_blacklist.txt deleted file mode 100644 index 3d73508..0000000 --- a/lib/clang/7.0.0/cfi_blacklist.txt +++ /dev/null @@ -1,19 +0,0 @@ -[cfi-unrelated-cast] -# The specification of std::get_temporary_buffer mandates a cast to -# uninitialized T* (libstdc++, libc++, MSVC stdlib). -fun:_ZSt20get_temporary_buffer* -fun:_ZNSt3__120get_temporary_buffer* -fun:*get_temporary_buffer@.*@std@@* - -# STL address-of magic (libstdc++, libc++). -fun:*__addressof* -fun:_ZNSt3__19addressof* - -# Windows C++ stdlib headers that contain bad unrelated casts. -src:*xmemory0 -src:*xstddef - -# std::_Sp_counted_ptr_inplace::_Sp_counted_ptr_inplace() (libstdc++). -# This ctor is used by std::make_shared and needs to cast to uninitialized T* -# in order to call std::allocator_traits::construct. -fun:_ZNSt23_Sp_counted_ptr_inplace* diff --git a/lib/clang/7.0.0/include/avx512ifmavlintrin.h b/lib/clang/7.0.0/include/avx512ifmavlintrin.h deleted file mode 100644 index 131ee5c..0000000 --- a/lib/clang/7.0.0/include/avx512ifmavlintrin.h +++ /dev/null @@ -1,149 +0,0 @@ -/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== - * - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __IFMAVLINTRIN_H -#define __IFMAVLINTRIN_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"))) - - - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W, - (__v2di) __X, - (__v2di) __Y, - (__mmask8) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) -{ - return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W, - (__v4di) __X, - (__v4di) __Y, - (__mmask8) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W, - (__v2di) __X, - (__v2di) __Y, - (__mmask8) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) -{ - return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W, - (__v4di) __X, - (__v4di) __Y, - (__mmask8) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) __M); -} - - -#undef __DEFAULT_FN_ATTRS - -#endif diff --git a/lib/clang/7.0.0/include/avx512vbmivlintrin.h b/lib/clang/7.0.0/include/avx512vbmivlintrin.h deleted file mode 100644 index 105c6d1..0000000 --- a/lib/clang/7.0.0/include/avx512vbmivlintrin.h +++ /dev/null @@ -1,247 +0,0 @@ -/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== - * - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __VBMIVLINTRIN_H -#define __VBMIVLINTRIN_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) - - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U, - __m128i __B) -{ - return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, - (__v16qi) __I - /* idx */ , - (__v16qi) __B, - (__mmask16) - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I, - __mmask32 __U, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, - (__v32qi) __I - /* idx */ , - (__v32qi) __B, - (__mmask32) - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I - /* idx */ , - (__v16qi) __A, - (__v16qi) __B, - (__mmask16) - - 1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I, - __m128i __B) -{ - return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I - /* idx */ , - (__v16qi) __A, - (__v16qi) __B, - (__mmask16) - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I, - __m128i __B) -{ - return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I - /* idx */ , - (__v16qi) __A, - (__v16qi) __B, - (__mmask16) - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I - /* idx */ , - (__v32qi) __A, - (__v32qi) __B, - (__mmask32) - - 1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U, - __m256i __I, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I - /* idx */ , - (__v32qi) __A, - (__v32qi) __B, - (__mmask32) - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A, - __m256i __I, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I - /* idx */ , - (__v32qi) __A, - (__v32qi) __B, - (__mmask32) - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutexvar_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, - (__v16qi) __A, - (__v16qi) _mm_undefined_si128 (), - (__mmask16) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, - (__v16qi) __A, - (__v16qi) _mm_setzero_si128 (), - (__mmask16) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, - __m128i __B) -{ - return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, - (__v16qi) __A, - (__v16qi) __W, - (__mmask16) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, - (__v32qi) __A, - (__v32qi) _mm256_undefined_si256 (), - (__mmask32) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, - __m256i __B) -{ - return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, - (__v32qi) __A, - (__v32qi) _mm256_setzero_si256 (), - (__mmask32) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, - __m256i __B) -{ - return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, - (__v32qi) __A, - (__v32qi) __W, - (__mmask32) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) __W, - (__mmask16) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) - _mm_undefined_si128 (), - (__mmask16) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) -{ - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) __W, - (__mmask32) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) -{ - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __M); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) -{ - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - - -#undef __DEFAULT_FN_ATTRS - -#endif diff --git a/lib/clang/7.0.0/include/avx512vlvbmi2intrin.h b/lib/clang/7.0.0/include/avx512vlvbmi2intrin.h deleted file mode 100644 index d1ec497..0000000 --- a/lib/clang/7.0.0/include/avx512vlvbmi2intrin.h +++ /dev/null @@ -1,748 +0,0 @@ -/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== - * - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __AVX512VLVBMI2INTRIN_H -#define __AVX512VLVBMI2INTRIN_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) - -static __inline __m128i __DEFAULT_FN_ATTRS -_mm128_setzero_hi(void) { - return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, - (__v8hi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, - (__v8hi) _mm128_setzero_hi(), - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, - (__v16qi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, - (__v16qi) _mm128_setzero_hi(), - __U); -} - -static __inline__ void __DEFAULT_FN_ATTRS -_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) -{ - __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, - __U); -} - -static __inline__ void __DEFAULT_FN_ATTRS -_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) -{ - __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, - (__v8hi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, - (__v8hi) _mm128_setzero_hi(), - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, - (__v16qi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) -{ - return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, - (__v16qi) _mm128_setzero_hi(), - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) -{ - return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, - (__v8hi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) -{ - return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, - (__v8hi) _mm128_setzero_hi(), - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) -{ - return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, - (__v16qi) __S, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) -{ - return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, - (__v16qi) _mm128_setzero_hi(), - __U); -} - -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setzero_hi(void) { - return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, - (__v16hi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, - (__v16hi) _mm256_setzero_hi(), - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, - (__v32qi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, - (__v32qi) _mm256_setzero_hi(), - __U); -} - -static __inline__ void __DEFAULT_FN_ATTRS -_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) -{ - __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, - __U); -} - -static __inline__ void __DEFAULT_FN_ATTRS -_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) -{ - __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, - (__v16hi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, - (__v16hi) _mm256_setzero_hi(), - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, - (__v32qi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) -{ - return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, - (__v32qi) _mm256_setzero_hi(), - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) -{ - return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, - (__v16hi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) -{ - return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, - (__v16hi) _mm256_setzero_hi(), - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) -{ - return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, - (__v32qi) __S, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) -{ - return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, - (__v32qi) _mm256_setzero_hi(), - __U); -} - -#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ - (__v4di)(B), \ - (int)(I), \ - (__v4di)(S), \ - (__mmask8)(U)); }) - -#define _mm256_maskz_shldi_epi64(U, A, B, I) \ - _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shldi_epi64(A, B, I) \ - _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ - (__v2di)(B), \ - (int)(I), \ - (__v2di)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shldi_epi64(U, A, B, I) \ - _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shldi_epi64(A, B, I) \ - _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ - (__v8si)(B), \ - (int)(I), \ - (__v8si)(S), \ - (__mmask8)(U)); }) - -#define _mm256_maskz_shldi_epi32(U, A, B, I) \ - _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shldi_epi32(A, B, I) \ - _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ - (__v4si)(B), \ - (int)(I), \ - (__v4si)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shldi_epi32(U, A, B, I) \ - _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shldi_epi32(A, B, I) \ - _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ - (__v16hi)(B), \ - (int)(I), \ - (__v16hi)(S), \ - (__mmask16)(U)); }) - -#define _mm256_maskz_shldi_epi16(U, A, B, I) \ - _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shldi_epi16(A, B, I) \ - _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ - (__v8hi)(B), \ - (int)(I), \ - (__v8hi)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shldi_epi16(U, A, B, I) \ - _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shldi_epi16(A, B, I) \ - _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ - (__v4di)(B), \ - (int)(I), \ - (__v4di)(S), \ - (__mmask8)(U)); }) - -#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ - _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shrdi_epi64(A, B, I) \ - _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ - (__v2di)(B), \ - (int)(I), \ - (__v2di)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shrdi_epi64(U, A, B, I) \ - _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shrdi_epi64(A, B, I) \ - _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ - (__v8si)(B), \ - (int)(I), \ - (__v8si)(S), \ - (__mmask8)(U)); }) - -#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ - _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shrdi_epi32(A, B, I) \ - _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ - (__v4si)(B), \ - (int)(I), \ - (__v4si)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shrdi_epi32(U, A, B, I) \ - _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shrdi_epi32(A, B, I) \ - _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ - (__v16hi)(B), \ - (int)(I), \ - (__v16hi)(S), \ - (__mmask16)(U)); }) - -#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ - _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) - -#define _mm256_shrdi_epi16(A, B, I) \ - _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) - -#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ - (__v8hi)(B), \ - (int)(I), \ - (__v8hi)(S), \ - (__mmask8)(U)); }) - -#define _mm128_maskz_shrdi_epi16(U, A, B, I) \ - _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) - -#define _mm128_shrdi_epi16(A, B, I) \ - _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); -} - - -#undef __DEFAULT_FN_ATTRS - -#endif diff --git a/lib/clang/7.0.0/include/avx512vlvnniintrin.h b/lib/clang/7.0.0/include/avx512vlvnniintrin.h deleted file mode 100644 index 745ae8b..0000000 --- a/lib/clang/7.0.0/include/avx512vlvnniintrin.h +++ /dev/null @@ -1,254 +0,0 @@ -/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== - * - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __AVX512VLVNNIINTRIN_H -#define __AVX512VLVNNIINTRIN_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) - - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - - -#undef __DEFAULT_FN_ATTRS - -#endif diff --git a/lib/clang/7.0.0/include/sanitizer/hwasan_interface.h b/lib/clang/7.0.0/include/sanitizer/hwasan_interface.h deleted file mode 100644 index 0c306cf..0000000 --- a/lib/clang/7.0.0/include/sanitizer/hwasan_interface.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- sanitizer/asan_interface.h ------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a part of HWAddressSanitizer. -// -// Public interface header. -//===----------------------------------------------------------------------===// -#ifndef SANITIZER_HWASAN_INTERFACE_H -#define SANITIZER_HWASAN_INTERFACE_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - // This function may be optionally provided by user and should return - // a string containing HWASan runtime options. See asan_flags.h for details. - const char* __hwasan_default_options(void); - - void __hwasan_enable_allocator_tagging(void); - void __hwasan_disable_allocator_tagging(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // SANITIZER_HWASAN_INTERFACE_H diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.10.4.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.10.4.a deleted file mode 100644 index 04d0220..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.10.4.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib deleted file mode 100755 index 2ae9ca7..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib deleted file mode 100755 index 17dd994..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib deleted file mode 100755 index 0135d82..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext.a deleted file mode 100644 index 883ad50..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a deleted file mode 100644 index bb65c35..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_osx.a deleted file mode 100644 index 7eb99a8..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.fuzzer_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib deleted file mode 100755 index a91e648..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib deleted file mode 100755 index a600ba5..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib deleted file mode 100755 index 47e8919..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.osx.a deleted file mode 100644 index 4b629c5..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_iossim.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_iossim.a deleted file mode 100644 index 3254a77..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_iossim.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_osx.a deleted file mode 100644 index e38bf0c..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.safestack_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.safestack_osx.a deleted file mode 100644 index 9a813bb..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.safestack_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib deleted file mode 100755 index 991817c..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib deleted file mode 100755 index 357668a..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib deleted file mode 100755 index 9fc6c18..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios.a deleted file mode 100644 index f149b35..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib deleted file mode 100755 index 6ae2075..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim.a deleted file mode 100644 index ea9a11e..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib deleted file mode 100755 index 1f76094..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a deleted file mode 100644 index 9d85d24..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a deleted file mode 100644 index cf0fe63..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a deleted file mode 100644 index 6f99d08..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib deleted file mode 100755 index 7591403..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx.a deleted file mode 100644 index 294a394..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib b/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib deleted file mode 100755 index 8810551..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-basic_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-basic_osx.a deleted file mode 100644 index 8b0a643..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-basic_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a deleted file mode 100644 index 60c115d..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray_osx.a b/lib/clang/7.0.0/lib/darwin/libclang_rt.xray_osx.a deleted file mode 100644 index 36a1c11..0000000 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.xray_osx.a and /dev/null differ diff --git a/lib/clang/7.0.0/include/__clang_cuda_builtin_vars.h b/lib/clang/8.0.0/include/__clang_cuda_builtin_vars.h similarity index 100% rename from lib/clang/7.0.0/include/__clang_cuda_builtin_vars.h rename to lib/clang/8.0.0/include/__clang_cuda_builtin_vars.h diff --git a/lib/clang/7.0.0/include/__clang_cuda_cmath.h b/lib/clang/8.0.0/include/__clang_cuda_cmath.h similarity index 100% rename from lib/clang/7.0.0/include/__clang_cuda_cmath.h rename to lib/clang/8.0.0/include/__clang_cuda_cmath.h diff --git a/lib/clang/7.0.0/include/__clang_cuda_complex_builtins.h b/lib/clang/8.0.0/include/__clang_cuda_complex_builtins.h similarity index 100% rename from lib/clang/7.0.0/include/__clang_cuda_complex_builtins.h rename to lib/clang/8.0.0/include/__clang_cuda_complex_builtins.h diff --git a/lib/clang/7.0.0/include/__clang_cuda_device_functions.h b/lib/clang/8.0.0/include/__clang_cuda_device_functions.h similarity index 79% rename from lib/clang/7.0.0/include/__clang_cuda_device_functions.h rename to lib/clang/8.0.0/include/__clang_cuda_device_functions.h index c4c2176..67bbc68 100644 --- a/lib/clang/7.0.0/include/__clang_cuda_device_functions.h +++ b/lib/clang/8.0.0/include/__clang_cuda_device_functions.h @@ -803,6 +803,8 @@ __DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_usad(__a, __b, __c); } + +#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); } __DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { @@ -1041,6 +1043,431 @@ __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { return __nv_vsubus4(__a, __b); } +#else // CUDA_VERSION >= 9020 +// CUDA no longer provides inline assembly (or bitcode) implementation of these +// functions, so we have to reimplment them. The implementation is naive and is +// not optimized for performance. + +// Helper function to convert N-bit boolean subfields into all-0 or all-1. +// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00 +// __bool2mask(0x00010000,16) -> 0xffff0000 +__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) { + return (__a << shift) - __a; +} +__DEVICE__ unsigned int __vabs2(unsigned int __a) { + unsigned int r; + asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabs4(unsigned int __a) { + unsigned int r; + asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} + +__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabsss2(unsigned int __a) { + unsigned int r; + asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vabsss4(unsigned int __a) { + unsigned int r; + asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vseteq2(__a, __b), 16); +} +__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vseteq4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetges2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetges4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgeu2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgeu4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgts2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgts4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgtu2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetgtu4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetles2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetles4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetleu2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetleu4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetlts2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetlts4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetltu2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetltu4(__a, __b), 8); +} +__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetne2(__a, __b), 16); +} +__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { + return __bool2mask(__vsetne4(__a, __b), 8); +} + +// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086 +// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) => +// (a + b) / 2 = ((a ^ b) >> 1) + (a & b) +// To operate on multiple sub-elements we need to make sure to mask out bits +// that crossed over into adjacent elements during the shift. +__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { + return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b); +} +__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { + return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b); +} + +__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { + unsigned int r; + if ((__a & 0x8000) && (__b & 0x8000)) { + // Work around a bug in ptxas which produces invalid result if low element + // is negative. + unsigned mask = __vcmpgts2(__a, __b); + r = (__a & mask) | (__b & ~mask); + } else { + asm("vmax2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + } + return r; +} +__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} + +__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); } + +__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); } +__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vnegss2(unsigned int __a) { + return __vsubss2(0, __a); +} +__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vnegss4(unsigned int __a) { + return __vsubss4(0, __a); +} +__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { + unsigned int r; + asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); + return r; +} +#endif // CUDA_VERSION >= 9020 __DEVICE__ int abs(int __a) { return __nv_abs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } diff --git a/lib/clang/7.0.0/include/__clang_cuda_intrinsics.h b/lib/clang/8.0.0/include/__clang_cuda_intrinsics.h similarity index 99% rename from lib/clang/7.0.0/include/__clang_cuda_intrinsics.h rename to lib/clang/8.0.0/include/__clang_cuda_intrinsics.h index 1794eb3..3c0cde9 100644 --- a/lib/clang/7.0.0/include/__clang_cuda_intrinsics.h +++ b/lib/clang/8.0.0/include/__clang_cuda_intrinsics.h @@ -277,6 +277,9 @@ inline __device__ long long __ldg(const long long *ptr) { inline __device__ unsigned char __ldg(const unsigned char *ptr) { return __nvvm_ldg_uc(ptr); } +inline __device__ signed char __ldg(const signed char *ptr) { + return __nvvm_ldg_uc((const unsigned char *)ptr); +} inline __device__ unsigned short __ldg(const unsigned short *ptr) { return __nvvm_ldg_us(ptr); } diff --git a/lib/clang/7.0.0/include/__clang_cuda_libdevice_declares.h b/lib/clang/8.0.0/include/__clang_cuda_libdevice_declares.h similarity index 99% rename from lib/clang/7.0.0/include/__clang_cuda_libdevice_declares.h rename to lib/clang/8.0.0/include/__clang_cuda_libdevice_declares.h index 73ea8f8..71df7f8 100644 --- a/lib/clang/7.0.0/include/__clang_cuda_libdevice_declares.h +++ b/lib/clang/8.0.0/include/__clang_cuda_libdevice_declares.h @@ -372,6 +372,7 @@ __device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b); __device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b); __device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, unsigned int __c); +#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __device__ int __nv_vabs2(int __a); __device__ int __nv_vabs4(int __a); __device__ int __nv_vabsdiffs2(int __a, int __b); @@ -454,12 +455,12 @@ __device__ int __nv_vsubss2(int __a, int __b); __device__ int __nv_vsubss4(int __a, int __b); __device__ int __nv_vsubus2(int __a, int __b); __device__ int __nv_vsubus4(int __a, int __b); +#endif // CUDA_VERSION __device__ double __nv_y0(double __a); __device__ float __nv_y0f(float __a); __device__ double __nv_y1(double __a); __device__ float __nv_y1f(float __a); __device__ float __nv_ynf(int __a, float __b); __device__ double __nv_yn(int __a, double __b); - } // extern "C" #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__ diff --git a/lib/clang/7.0.0/include/__clang_cuda_math_forward_declares.h b/lib/clang/8.0.0/include/__clang_cuda_math_forward_declares.h similarity index 100% rename from lib/clang/7.0.0/include/__clang_cuda_math_forward_declares.h rename to lib/clang/8.0.0/include/__clang_cuda_math_forward_declares.h diff --git a/lib/clang/7.0.0/include/__clang_cuda_runtime_wrapper.h b/lib/clang/8.0.0/include/__clang_cuda_runtime_wrapper.h similarity index 95% rename from lib/clang/7.0.0/include/__clang_cuda_runtime_wrapper.h rename to lib/clang/8.0.0/include/__clang_cuda_runtime_wrapper.h index a5eb4bd..f05c045 100644 --- a/lib/clang/7.0.0/include/__clang_cuda_runtime_wrapper.h +++ b/lib/clang/8.0.0/include/__clang_cuda_runtime_wrapper.h @@ -62,10 +62,15 @@ #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" -#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9010 +#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000 #error "Unsupported CUDA version!" #endif +#pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") +#if CUDA_VERSION >= 10000 +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#endif + // Make largest subset of device functions available during host // compilation -- SM_35 for the time being. #ifndef __CUDA_ARCH__ @@ -100,11 +105,17 @@ #include "host_config.h" #include "host_defines.h" +// Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in +// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the +// functional equivalent of what we need. +#pragma push_macro("nv_weak") +#define nv_weak weak #undef __CUDABE__ #undef __CUDA_LIBDEVICE__ #define __CUDACC__ #include "cuda_runtime.h" +#pragma pop_macro("nv_weak") #undef __CUDACC__ #define __CUDABE__ @@ -199,6 +210,11 @@ inline __host__ double __signbitd(double x) { #endif #if CUDA_VERSION >= 9000 +// CUDA-9.2 needs host-side memcpy for some host functions in +// device_functions.hpp +#if CUDA_VERSION >= 9020 +#include +#endif #include "crt/math_functions.hpp" #else #include "math_functions.hpp" @@ -408,6 +424,7 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { #pragma pop_macro("dim3") #pragma pop_macro("uint3") #pragma pop_macro("__USE_FAST_MATH__") +#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") #endif // __CUDA__ #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ diff --git a/lib/clang/7.0.0/include/__stddef_max_align_t.h b/lib/clang/8.0.0/include/__stddef_max_align_t.h similarity index 100% rename from lib/clang/7.0.0/include/__stddef_max_align_t.h rename to lib/clang/8.0.0/include/__stddef_max_align_t.h diff --git a/lib/clang/7.0.0/include/__wmmintrin_aes.h b/lib/clang/8.0.0/include/__wmmintrin_aes.h similarity index 89% rename from lib/clang/7.0.0/include/__wmmintrin_aes.h rename to lib/clang/8.0.0/include/__wmmintrin_aes.h index 570776d..70c355e 100644 --- a/lib/clang/7.0.0/include/__wmmintrin_aes.h +++ b/lib/clang/8.0.0/include/__wmmintrin_aes.h @@ -20,15 +20,18 @@ * *===-----------------------------------------------------------------------=== */ -#ifndef _WMMINTRIN_AES_H -#define _WMMINTRIN_AES_H -#include +#ifndef __WMMINTRIN_H +#error "Never use <__wmmintrin_aes.h> directly; include instead." +#endif + +#ifndef __WMMINTRIN_AES_H +#define __WMMINTRIN_AES_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128))) -/// \brief Performs a single round of AES encryption using the Equivalent +/// Performs a single round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. @@ -48,7 +51,7 @@ _mm_aesenc_si128(__m128i __V, __m128i __R) return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R); } -/// \brief Performs the final round of AES encryption using the Equivalent +/// Performs the final round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. @@ -68,7 +71,7 @@ _mm_aesenclast_si128(__m128i __V, __m128i __R) return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R); } -/// \brief Performs a single round of AES decryption using the Equivalent +/// Performs a single round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. @@ -88,7 +91,7 @@ _mm_aesdec_si128(__m128i __V, __m128i __R) return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R); } -/// \brief Performs the final round of AES decryption using the Equivalent +/// Performs the final round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. @@ -108,7 +111,7 @@ _mm_aesdeclast_si128(__m128i __V, __m128i __R) return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R); } -/// \brief Applies the AES InvMixColumns() transformation to an expanded key +/// Applies the AES InvMixColumns() transformation to an expanded key /// contained in the source operand, and writes the result to the /// destination. /// @@ -125,7 +128,7 @@ _mm_aesimc_si128(__m128i __V) return (__m128i)__builtin_ia32_aesimc128((__v2di)__V); } -/// \brief Generates a round key for AES encryption, operating on 128-bit data +/// Generates a round key for AES encryption, operating on 128-bit data /// specified in the first source operand and using an 8-bit round constant /// specified by the second source operand, and writes the result to the /// destination. @@ -148,4 +151,4 @@ _mm_aesimc_si128(__m128i __V) #undef __DEFAULT_FN_ATTRS -#endif /* _WMMINTRIN_AES_H */ +#endif /* __WMMINTRIN_AES_H */ diff --git a/lib/clang/7.0.0/include/__wmmintrin_pclmul.h b/lib/clang/8.0.0/include/__wmmintrin_pclmul.h similarity index 84% rename from lib/clang/7.0.0/include/__wmmintrin_pclmul.h rename to lib/clang/8.0.0/include/__wmmintrin_pclmul.h index e9c6a9f..e0f9287 100644 --- a/lib/clang/7.0.0/include/__wmmintrin_pclmul.h +++ b/lib/clang/8.0.0/include/__wmmintrin_pclmul.h @@ -20,10 +20,15 @@ * *===-----------------------------------------------------------------------=== */ -#ifndef _WMMINTRIN_PCLMUL_H -#define _WMMINTRIN_PCLMUL_H -/// \brief Multiplies two 64-bit integer values, which are selected from source +#ifndef __WMMINTRIN_H +#error "Never use <__wmmintrin_pclmul.h> directly; include instead." +#endif + +#ifndef __WMMINTRIN_PCLMUL_H +#define __WMMINTRIN_PCLMUL_H + +/// Multiplies two 64-bit integer values, which are selected from source /// operands using the immediate-value operand. The multiplication is a /// carry-less multiplication, and the 128-bit integer product is stored in /// the destination. @@ -50,8 +55,8 @@ /// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. -#define _mm_clmulepi64_si128(__X, __Y, __I) \ - ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ - (__v2di)(__m128i)(__Y), (char)(__I))) +#define _mm_clmulepi64_si128(X, Y, I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \ + (__v2di)(__m128i)(Y), (char)(I))) -#endif /* _WMMINTRIN_PCLMUL_H */ +#endif /* __WMMINTRIN_PCLMUL_H */ diff --git a/lib/clang/7.0.0/include/adxintrin.h b/lib/clang/8.0.0/include/adxintrin.h similarity index 100% rename from lib/clang/7.0.0/include/adxintrin.h rename to lib/clang/8.0.0/include/adxintrin.h diff --git a/lib/clang/7.0.0/include/altivec.h b/lib/clang/8.0.0/include/altivec.h similarity index 100% rename from lib/clang/7.0.0/include/altivec.h rename to lib/clang/8.0.0/include/altivec.h diff --git a/lib/clang/7.0.0/include/ammintrin.h b/lib/clang/8.0.0/include/ammintrin.h similarity index 94% rename from lib/clang/7.0.0/include/ammintrin.h rename to lib/clang/8.0.0/include/ammintrin.h index 2843a7a..680b446 100644 --- a/lib/clang/7.0.0/include/ammintrin.h +++ b/lib/clang/8.0.0/include/ammintrin.h @@ -27,9 +27,9 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) -/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index \a idx and of the length \a len. /// /// \headerfile @@ -57,7 +57,7 @@ ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ (char)(len), (char)(idx))) -/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index and of the length specified by /// \a __y. /// @@ -82,7 +82,7 @@ _mm_extract_si64(__m128i __x, __m128i __y) return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); } -/// \brief Inserts bits of a specified length from the source integer vector +/// Inserts bits of a specified length from the source integer vector /// \a y into the lower 64 bits of the destination integer vector \a x at /// the index \a idx and of the length \a len. /// @@ -120,7 +120,7 @@ _mm_extract_si64(__m128i __x, __m128i __y) (__v2di)(__m128i)(y), \ (char)(len), (char)(idx))) -/// \brief Inserts bits of a specified length from the source integer vector +/// Inserts bits of a specified length from the source integer vector /// \a __y into the lower 64 bits of the destination integer vector \a __x /// at the index and of the length specified by \a __y. /// @@ -152,7 +152,7 @@ _mm_insert_si64(__m128i __x, __m128i __y) return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); } -/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. +/// Stores a 64-bit double-precision value in a 64-bit memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// @@ -170,7 +170,7 @@ _mm_stream_sd(double *__p, __m128d __a) __builtin_ia32_movntsd(__p, (__v2df)__a); } -/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit +/// Stores a 32-bit single-precision floating-point value in a 32-bit /// memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// diff --git a/lib/clang/7.0.0/include/arm64intr.h b/lib/clang/8.0.0/include/arm64intr.h similarity index 100% rename from lib/clang/7.0.0/include/arm64intr.h rename to lib/clang/8.0.0/include/arm64intr.h diff --git a/lib/clang/7.0.0/include/arm_acle.h b/lib/clang/8.0.0/include/arm_acle.h similarity index 100% rename from lib/clang/7.0.0/include/arm_acle.h rename to lib/clang/8.0.0/include/arm_acle.h diff --git a/lib/clang/7.0.0/include/arm_fp16.h b/lib/clang/8.0.0/include/arm_fp16.h similarity index 99% rename from lib/clang/7.0.0/include/arm_fp16.h rename to lib/clang/8.0.0/include/arm_fp16.h index 45ff14f..de54465 100644 --- a/lib/clang/7.0.0/include/arm_fp16.h +++ b/lib/clang/8.0.0/include/arm_fp16.h @@ -27,7 +27,7 @@ #include typedef __fp16 float16_t; -#define __ai static inline __attribute__((__always_inline__, __nodebug__)) +#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ diff --git a/lib/clang/7.0.0/include/arm_neon.h b/lib/clang/8.0.0/include/arm_neon.h similarity index 99% rename from lib/clang/7.0.0/include/arm_neon.h rename to lib/clang/8.0.0/include/arm_neon.h index 8b7c7a8..c51a630 100644 --- a/lib/clang/7.0.0/include/arm_neon.h +++ b/lib/clang/8.0.0/include/arm_neon.h @@ -427,7 +427,7 @@ typedef struct poly64x2x4_t { #endif -#define __ai static inline __attribute__((__always_inline__, __nodebug__)) +#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { @@ -7592,21 +7592,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1q_f16(__p0) __extension__ ({ \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ - __ret; \ -}) -#else -#define vld1q_f16(__p0) __extension__ ({ \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ @@ -7741,21 +7726,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1_f16(__p0) __extension__ ({ \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ - __ret; \ -}) -#else -#define vld1_f16(__p0) __extension__ ({ \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ @@ -7950,21 +7920,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1q_dup_f16(__p0) __extension__ ({ \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ - __ret; \ -}) -#else -#define vld1q_dup_f16(__p0) __extension__ ({ \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ @@ -8099,21 +8054,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1_dup_f16(__p0) __extension__ ({ \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ - __ret; \ -}) -#else -#define vld1_dup_f16(__p0) __extension__ ({ \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ @@ -8338,24 +8278,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ - __ret; \ -}) -#else -#define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ @@ -8516,24 +8438,6 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ - __ret; \ -}) -#else -#define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s1 = __p1; \ @@ -8587,15 +8491,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_p8(__p0) __extension__ ({ \ +#define vld1_p8_x2(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 4); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld2_p8(__p0) __extension__ ({ \ +#define vld1_p8_x2(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 4); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8604,15 +8508,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_p16(__p0) __extension__ ({ \ +#define vld1_p16_x2(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 5); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld2_p16(__p0) __extension__ ({ \ +#define vld1_p16_x2(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 5); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -8621,15 +8525,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_p8(__p0) __extension__ ({ \ +#define vld1q_p8_x2(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 36); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld2q_p8(__p0) __extension__ ({ \ +#define vld1q_p8_x2(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 36); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8638,15 +8542,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_p16(__p0) __extension__ ({ \ +#define vld1q_p16_x2(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 37); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ __ret; \ }) #else -#define vld2q_p16(__p0) __extension__ ({ \ +#define vld1q_p16_x2(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 37); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8655,15 +8559,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_u8(__p0) __extension__ ({ \ +#define vld1q_u8_x2(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 48); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld2q_u8(__p0) __extension__ ({ \ +#define vld1q_u8_x2(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 48); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8672,15 +8576,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_u32(__p0) __extension__ ({ \ +#define vld1q_u32_x2(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 50); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld2q_u32(__p0) __extension__ ({ \ +#define vld1q_u32_x2(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 50); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -8689,15 +8593,32 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_u16(__p0) __extension__ ({ \ +#define vld1q_u64_x2(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ + __ret; \ +}) +#else +#define vld1q_u64_x2(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_u16_x2(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 49); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld2q_u16(__p0) __extension__ ({ \ +#define vld1q_u16_x2(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 49); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8706,15 +8627,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_s8(__p0) __extension__ ({ \ +#define vld1q_s8_x2(__p0) __extension__ ({ \ int8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 32); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld2q_s8(__p0) __extension__ ({ \ +#define vld1q_s8_x2(__p0) __extension__ ({ \ int8x16x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 32); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8723,15 +8644,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_f32(__p0) __extension__ ({ \ +#define vld1q_f32_x2(__p0) __extension__ ({ \ float32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 41); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld2q_f32(__p0) __extension__ ({ \ +#define vld1q_f32_x2(__p0) __extension__ ({ \ float32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 41); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -8740,49 +8661,49 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_f16(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 40); \ +#define vld1q_s32_x2(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld2q_f16(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 40); \ +#define vld1q_s32_x2(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_s32(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 34); \ +#define vld1q_s64_x2(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ __ret; \ }) #else -#define vld2q_s32(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 34); \ +#define vld1q_s64_x2(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_s16(__p0) __extension__ ({ \ +#define vld1q_s16_x2(__p0) __extension__ ({ \ int16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 33); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld2q_s16(__p0) __extension__ ({ \ +#define vld1q_s16_x2(__p0) __extension__ ({ \ int16x8x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 33); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8791,15 +8712,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_u8(__p0) __extension__ ({ \ +#define vld1_u8_x2(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 16); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld2_u8(__p0) __extension__ ({ \ +#define vld1_u8_x2(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 16); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8808,15 +8729,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_u32(__p0) __extension__ ({ \ +#define vld1_u32_x2(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 18); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld2_u32(__p0) __extension__ ({ \ +#define vld1_u32_x2(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 18); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -8825,29 +8746,29 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_u64(__p0) __extension__ ({ \ +#define vld1_u64_x2(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 19); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ __ret; \ }) #else -#define vld2_u64(__p0) __extension__ ({ \ +#define vld1_u64_x2(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 19); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_u16(__p0) __extension__ ({ \ +#define vld1_u16_x2(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 17); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ __ret; \ }) #else -#define vld2_u16(__p0) __extension__ ({ \ +#define vld1_u16_x2(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 17); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -8856,15 +8777,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_s8(__p0) __extension__ ({ \ +#define vld1_s8_x2(__p0) __extension__ ({ \ int8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 0); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld2_s8(__p0) __extension__ ({ \ +#define vld1_s8_x2(__p0) __extension__ ({ \ int8x8x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 0); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -8873,15 +8794,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_f32(__p0) __extension__ ({ \ +#define vld1_f32_x2(__p0) __extension__ ({ \ float32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 9); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld2_f32(__p0) __extension__ ({ \ +#define vld1_f32_x2(__p0) __extension__ ({ \ float32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 9); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -8890,32 +8811,15 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_f16(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 8); \ - __ret; \ -}) -#else -#define vld2_f16(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 8); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld2_s32(__p0) __extension__ ({ \ +#define vld1_s32_x2(__p0) __extension__ ({ \ int32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 2); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld2_s32(__p0) __extension__ ({ \ +#define vld1_s32_x2(__p0) __extension__ ({ \ int32x2x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 2); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -8924,29 +8828,29 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_s64(__p0) __extension__ ({ \ +#define vld1_s64_x2(__p0) __extension__ ({ \ int64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 3); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld2_s64(__p0) __extension__ ({ \ +#define vld1_s64_x2(__p0) __extension__ ({ \ int64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 3); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_s16(__p0) __extension__ ({ \ +#define vld1_s16_x2(__p0) __extension__ ({ \ int16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 1); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ __ret; \ }) #else -#define vld2_s16(__p0) __extension__ ({ \ +#define vld1_s16_x2(__p0) __extension__ ({ \ int16x4x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 1); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -8955,2776 +8859,4228 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_p8(__p0) __extension__ ({ \ - poly8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ +#define vld1_p8_x3(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld2_dup_p8(__p0) __extension__ ({ \ - poly8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ +#define vld1_p8_x3(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_p16(__p0) __extension__ ({ \ - poly16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ +#define vld1_p16_x3(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld2_dup_p16(__p0) __extension__ ({ \ - poly16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ +#define vld1_p16_x3(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_u8(__p0) __extension__ ({ \ - uint8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ +#define vld1q_p8_x3(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld2_dup_u8(__p0) __extension__ ({ \ - uint8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ +#define vld1q_p8_x3(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_u32(__p0) __extension__ ({ \ - uint32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ +#define vld1q_p16_x3(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ __ret; \ }) #else -#define vld2_dup_u32(__p0) __extension__ ({ \ - uint32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ +#define vld1q_p16_x3(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_u64(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ +#define vld1q_u8_x3(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld2_dup_u64(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ +#define vld1q_u8_x3(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_u16(__p0) __extension__ ({ \ - uint16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ +#define vld1q_u32_x3(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld2_dup_u16(__p0) __extension__ ({ \ - uint16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ +#define vld1q_u32_x3(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_s8(__p0) __extension__ ({ \ - int8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ +#define vld1q_u64_x3(__p0) __extension__ ({ \ + uint64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ __ret; \ }) #else -#define vld2_dup_s8(__p0) __extension__ ({ \ - int8x8x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ +#define vld1q_u64_x3(__p0) __extension__ ({ \ + uint64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_f32(__p0) __extension__ ({ \ - float32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ +#define vld1q_u16_x3(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld2_dup_f32(__p0) __extension__ ({ \ - float32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ +#define vld1q_u16_x3(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_f16(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ +#define vld1q_s8_x3(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld2_dup_f16(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ +#define vld1q_s8_x3(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_s32(__p0) __extension__ ({ \ - int32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ +#define vld1q_f32_x3(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld2_dup_s32(__p0) __extension__ ({ \ - int32x2x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ +#define vld1q_f32_x3(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_s64(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ +#define vld1q_s32_x3(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld2_dup_s64(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ +#define vld1q_s32_x3(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_s16(__p0) __extension__ ({ \ - int16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ +#define vld1q_s64_x3(__p0) __extension__ ({ \ + int64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ __ret; \ }) #else -#define vld2_dup_s16(__p0) __extension__ ({ \ - int16x4x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ +#define vld1q_s64_x3(__p0) __extension__ ({ \ + int64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x2_t __s1 = __p1; \ - poly8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ +#define vld1q_s16_x3(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x2_t __s1 = __p1; \ - poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ +#define vld1q_s16_x3(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x2_t __s1 = __p1; \ - poly16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ - __ret; \ -}) -#else -#define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x2_t __s1 = __p1; \ - poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - poly16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x2_t __s1 = __p1; \ - poly16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ +#define vld1_u8_x3(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x2_t __s1 = __p1; \ - poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ +#define vld1_u8_x3(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x2_t __s1 = __p1; \ - uint32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ +#define vld1_u32_x3(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x2_t __s1 = __p1; \ - uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - uint32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ +#define vld1_u32_x3(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x2_t __s1 = __p1; \ - uint16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ +#define vld1_u64_x3(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ __ret; \ }) #else -#define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x2_t __s1 = __p1; \ - uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld1_u64_x3(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x2_t __s1 = __p1; \ - float32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 41); \ +#define vld1_u16_x3(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ __ret; \ }) #else -#define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x2_t __s1 = __p1; \ - float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - float32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 41); \ +#define vld1_u16_x3(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 40); \ +#define vld1_s8_x3(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ +#define vld1_s8_x3(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x2_t __s1 = __p1; \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 34); \ - __ret; \ -}) -#else -#define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x2_t __s1 = __p1; \ - int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x2_t __s1 = __p1; \ - int16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 33); \ +#define vld1_f32_x3(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x2_t __s1 = __p1; \ - int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 33); \ +#define vld1_f32_x3(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x2_t __s1 = __p1; \ - uint8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ +#define vld1_s32_x3(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x2_t __s1 = __p1; \ - uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ +#define vld1_s32_x3(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x2_t __s1 = __p1; \ - uint32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ +#define vld1_s64_x3(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x2_t __s1 = __p1; \ - uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - uint32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vld1_s64_x3(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x2_t __s1 = __p1; \ - uint16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ +#define vld1_s16_x3(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ __ret; \ }) #else -#define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x2_t __s1 = __p1; \ - uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - uint16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ +#define vld1_s16_x3(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x2_t __s1 = __p1; \ - int8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ +#define vld1_p8_x4(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x2_t __s1 = __p1; \ - int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ +#define vld1_p8_x4(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x2_t __s1 = __p1; \ - float32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 9); \ - __ret; \ -}) -#else -#define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x2_t __s1 = __p1; \ - float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - float32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 9); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 8); \ +#define vld1_p16_x4(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - float16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ +#define vld1_p16_x4(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x2_t __s1 = __p1; \ - int32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 2); \ +#define vld1q_p8_x4(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x2_t __s1 = __p1; \ - int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - int32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 2); \ +#define vld1q_p8_x4(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x2_t __s1 = __p1; \ - int16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 1); \ +#define vld1q_p16_x4(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ __ret; \ }) #else -#define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x2_t __s1 = __p1; \ - int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - int16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 1); \ +#define vld1q_p16_x4(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_p8(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 4); \ +#define vld1q_u8_x4(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld3_p8(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 4); \ +#define vld1q_u8_x4(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_p16(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 5); \ +#define vld1q_u32_x4(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld3_p16(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 5); \ +#define vld1q_u32_x4(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_p8(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 36); \ +#define vld1q_u64_x4(__p0) __extension__ ({ \ + uint64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ __ret; \ }) #else -#define vld3q_p8(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 36); \ +#define vld1q_u64_x4(__p0) __extension__ ({ \ + uint64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_p16(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 37); \ +#define vld1q_u16_x4(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld3q_p16(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 37); \ +#define vld1q_u16_x4(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_u8(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 48); \ +#define vld1q_s8_x4(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld3q_u8(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 48); \ +#define vld1q_s8_x4(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_u32(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 50); \ +#define vld1q_f32_x4(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld3q_u32(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 50); \ +#define vld1q_f32_x4(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_u16(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 49); \ +#define vld1q_s32_x4(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld3q_u16(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 49); \ +#define vld1q_s32_x4(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_s8(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 32); \ +#define vld1q_s64_x4(__p0) __extension__ ({ \ + int64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ __ret; \ }) #else -#define vld3q_s8(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 32); \ +#define vld1q_s64_x4(__p0) __extension__ ({ \ + int64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_f32(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 41); \ +#define vld1q_s16_x4(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld3q_f32(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 41); \ +#define vld1q_s16_x4(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_f16(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 40); \ +#define vld1_u8_x4(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld3q_f16(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 40); \ +#define vld1_u8_x4(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_s32(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 34); \ +#define vld1_u32_x4(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld3q_s32(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 34); \ +#define vld1_u32_x4(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1_u64_x4(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ + __ret; \ +}) +#else +#define vld1_u64_x4(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1_u16_x4(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ + __ret; \ +}) +#else +#define vld1_u16_x4(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_s16(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 33); \ +#define vld1_s8_x4(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld3q_s16(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_v(&__ret, __p0, 33); \ +#define vld1_s8_x4(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_u8(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 16); \ +#define vld1_f32_x4(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld3_u8(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 16); \ +#define vld1_f32_x4(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_u32(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 18); \ +#define vld1_s32_x4(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld3_u32(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 18); \ +#define vld1_s32_x4(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 19); \ +#define vld1_s64_x4(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld3_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 19); \ +#define vld1_s64_x4(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_u16(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 17); \ +#define vld1_s16_x4(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ __ret; \ }) #else -#define vld3_u16(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 17); \ +#define vld1_s16_x4(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_s8(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 0); \ +#define vld2_p8(__p0) __extension__ ({ \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld3_s8(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 0); \ +#define vld2_p8(__p0) __extension__ ({ \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_f32(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 9); \ +#define vld2_p16(__p0) __extension__ ({ \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld3_f32(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 9); \ +#define vld2_p16(__p0) __extension__ ({ \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_f16(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 8); \ +#define vld2q_p8(__p0) __extension__ ({ \ + poly8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld3_f16(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 8); \ +#define vld2q_p8(__p0) __extension__ ({ \ + poly8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_s32(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 2); \ +#define vld2q_p16(__p0) __extension__ ({ \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 37); \ __ret; \ }) #else -#define vld3_s32(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 2); \ +#define vld2q_p16(__p0) __extension__ ({ \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 3); \ +#define vld2q_u8(__p0) __extension__ ({ \ + uint8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld3_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 3); \ +#define vld2q_u8(__p0) __extension__ ({ \ + uint8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 48); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_s16(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 1); \ +#define vld2q_u32(__p0) __extension__ ({ \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld3_s16(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 1); \ +#define vld2q_u32(__p0) __extension__ ({ \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_p8(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ +#define vld2q_u16(__p0) __extension__ ({ \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld3_dup_p8(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ +#define vld2q_u16(__p0) __extension__ ({ \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_p16(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ +#define vld2q_s8(__p0) __extension__ ({ \ + int8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld3_dup_p16(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ +#define vld2q_s8(__p0) __extension__ ({ \ + int8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_u8(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ +#define vld2q_f32(__p0) __extension__ ({ \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld3_dup_u8(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ +#define vld2q_f32(__p0) __extension__ ({ \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_u32(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ +#define vld2q_s32(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld3_dup_u32(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ +#define vld2q_s32(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ +#define vld2q_s16(__p0) __extension__ ({ \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld3_dup_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ +#define vld2q_s16(__p0) __extension__ ({ \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_u16(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ +#define vld2_u8(__p0) __extension__ ({ \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld3_dup_u16(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ +#define vld2_u8(__p0) __extension__ ({ \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_s8(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ +#define vld2_u32(__p0) __extension__ ({ \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld3_dup_s8(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ +#define vld2_u32(__p0) __extension__ ({ \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_f32(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ +#define vld2_u64(__p0) __extension__ ({ \ + uint64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 19); \ __ret; \ }) #else -#define vld3_dup_f32(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ +#define vld2_u64(__p0) __extension__ ({ \ + uint64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_f16(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ +#define vld2_u16(__p0) __extension__ ({ \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 17); \ __ret; \ }) #else -#define vld3_dup_f16(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ +#define vld2_u16(__p0) __extension__ ({ \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_s32(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ +#define vld2_s8(__p0) __extension__ ({ \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld3_dup_s32(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ +#define vld2_s8(__p0) __extension__ ({ \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ +#define vld2_f32(__p0) __extension__ ({ \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld3_dup_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ +#define vld2_f32(__p0) __extension__ ({ \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_dup_s16(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ +#define vld2_s32(__p0) __extension__ ({ \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld3_dup_s16(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ +#define vld2_s32(__p0) __extension__ ({ \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x3_t __s1 = __p1; \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ +#define vld2_s64(__p0) __extension__ ({ \ + int64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x3_t __s1 = __p1; \ - poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld2_s64(__p0) __extension__ ({ \ + int64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x3_t __s1 = __p1; \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ +#define vld2_s16(__p0) __extension__ ({ \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 1); \ __ret; \ }) #else -#define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x3_t __s1 = __p1; \ - poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - poly16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ +#define vld2_s16(__p0) __extension__ ({ \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x3_t __s1 = __p1; \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ +#define vld2_dup_p8(__p0) __extension__ ({ \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x3_t __s1 = __p1; \ - poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ +#define vld2_dup_p8(__p0) __extension__ ({ \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x3_t __s1 = __p1; \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ +#define vld2_dup_p16(__p0) __extension__ ({ \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x3_t __s1 = __p1; \ - uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ +#define vld2_dup_p16(__p0) __extension__ ({ \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x3_t __s1 = __p1; \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ +#define vld2q_dup_p8(__p0) __extension__ ({ \ + poly8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x3_t __s1 = __p1; \ - uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ +#define vld2q_dup_p8(__p0) __extension__ ({ \ + poly8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld2q_dup_p16(__p0) __extension__ ({ \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ + __ret; \ +}) +#else +#define vld2q_dup_p16(__p0) __extension__ ({ \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x3_t __s1 = __p1; \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 41); \ +#define vld2q_dup_u8(__p0) __extension__ ({ \ + uint8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x3_t __s1 = __p1; \ - float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ - __ret; \ -}) -#else -#define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ +#define vld2q_dup_u8(__p0) __extension__ ({ \ + uint8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x3_t __s1 = __p1; \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 34); \ +#define vld2q_dup_u32(__p0) __extension__ ({ \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x3_t __s1 = __p1; \ - int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 34); \ +#define vld2q_dup_u32(__p0) __extension__ ({ \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x3_t __s1 = __p1; \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 33); \ +#define vld2q_dup_u64(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ __ret; \ }) #else -#define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x3_t __s1 = __p1; \ - int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 33); \ +#define vld2q_dup_u64(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x3_t __s1 = __p1; \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ +#define vld2q_dup_u16(__p0) __extension__ ({ \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x3_t __s1 = __p1; \ - uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ +#define vld2q_dup_u16(__p0) __extension__ ({ \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x3_t __s1 = __p1; \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ +#define vld2q_dup_s8(__p0) __extension__ ({ \ + int8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x3_t __s1 = __p1; \ - uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - uint32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ +#define vld2q_dup_s8(__p0) __extension__ ({ \ + int8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x3_t __s1 = __p1; \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ +#define vld2q_dup_f32(__p0) __extension__ ({ \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x3_t __s1 = __p1; \ - uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - uint16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ +#define vld2q_dup_f32(__p0) __extension__ ({ \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x3_t __s1 = __p1; \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ +#define vld2q_dup_s32(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x3_t __s1 = __p1; \ - int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ +#define vld2q_dup_s32(__p0) __extension__ ({ \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x3_t __s1 = __p1; \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 9); \ +#define vld2q_dup_s64(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ __ret; \ }) #else -#define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x3_t __s1 = __p1; \ - float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - float32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 9); \ +#define vld2q_dup_s64(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ +#define vld2q_dup_s16(__p0) __extension__ ({ \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - float16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ +#define vld2q_dup_s16(__p0) __extension__ ({ \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x3_t __s1 = __p1; \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 2); \ +#define vld2_dup_u8(__p0) __extension__ ({ \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x3_t __s1 = __p1; \ - int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - int32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 2); \ +#define vld2_dup_u8(__p0) __extension__ ({ \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x3_t __s1 = __p1; \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 1); \ +#define vld2_dup_u32(__p0) __extension__ ({ \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x3_t __s1 = __p1; \ - int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - int16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 1); \ +#define vld2_dup_u32(__p0) __extension__ ({ \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_p8(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 4); \ +#define vld2_dup_u64(__p0) __extension__ ({ \ + uint64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ __ret; \ }) #else -#define vld4_p8(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 4); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld2_dup_u64(__p0) __extension__ ({ \ + uint64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_p16(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 5); \ +#define vld2_dup_u16(__p0) __extension__ ({ \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ __ret; \ }) #else -#define vld4_p16(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 5); \ +#define vld2_dup_u16(__p0) __extension__ ({ \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_p8(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 36); \ +#define vld2_dup_s8(__p0) __extension__ ({ \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld4q_p8(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 36); \ +#define vld2_dup_s8(__p0) __extension__ ({ \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_p16(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 37); \ +#define vld2_dup_f32(__p0) __extension__ ({ \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld4q_p16(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 37); \ +#define vld2_dup_f32(__p0) __extension__ ({ \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_u8(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 48); \ +#define vld2_dup_s32(__p0) __extension__ ({ \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld4q_u8(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 48); \ +#define vld2_dup_s32(__p0) __extension__ ({ \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_u32(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 50); \ +#define vld2_dup_s64(__p0) __extension__ ({ \ + int64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld4q_u32(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define vld2_dup_s64(__p0) __extension__ ({ \ + int64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_u16(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 49); \ +#define vld2_dup_s16(__p0) __extension__ ({ \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ __ret; \ }) #else -#define vld4q_u16(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 49); \ +#define vld2_dup_s16(__p0) __extension__ ({ \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_s8(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 32); \ +#define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x2_t __s1 = __p1; \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ __ret; \ }) #else -#define vld4q_s8(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 32); \ +#define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x2_t __s1 = __p1; \ + poly8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_f32(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 41); \ +#define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x2_t __s1 = __p1; \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ __ret; \ }) #else -#define vld4q_f32(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 41); \ +#define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x2_t __s1 = __p1; \ + poly16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + poly16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_f16(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 40); \ +#define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x2_t __s1 = __p1; \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ __ret; \ }) #else -#define vld4q_f16(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 40); \ +#define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x2_t __s1 = __p1; \ + poly16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_s32(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 34); \ +#define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x2_t __s1 = __p1; \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ __ret; \ }) #else -#define vld4q_s32(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 34); \ +#define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x2_t __s1 = __p1; \ + uint32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + uint32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_s16(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 33); \ +#define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x2_t __s1 = __p1; \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ __ret; \ }) #else -#define vld4q_s16(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_v(&__ret, __p0, 33); \ +#define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x2_t __s1 = __p1; \ + uint16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_u8(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 16); \ +#define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x2_t __s1 = __p1; \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 41); \ __ret; \ }) #else -#define vld4_u8(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 16); \ +#define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x2_t __s1 = __p1; \ + float32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + float32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_u32(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 18); \ +#define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x2_t __s1 = __p1; \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 34); \ __ret; \ }) #else -#define vld4_u32(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 18); \ +#define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x2_t __s1 = __p1; \ + int32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + int32x4x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 19); \ +#define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x2_t __s1 = __p1; \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 33); \ __ret; \ }) #else -#define vld4_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 19); \ +#define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x2_t __s1 = __p1; \ + int16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_u16(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 17); \ +#define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x2_t __s1 = __p1; \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ __ret; \ }) #else -#define vld4_u16(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 17); \ +#define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x2_t __s1 = __p1; \ + uint8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_s8(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 0); \ +#define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x2_t __s1 = __p1; \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ __ret; \ }) #else -#define vld4_s8(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 0); \ +#define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x2_t __s1 = __p1; \ + uint32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + uint32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_f32(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 9); \ +#define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x2_t __s1 = __p1; \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ __ret; \ }) #else -#define vld4_f32(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 9); \ +#define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x2_t __s1 = __p1; \ + uint16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + uint16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_f16(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 8); \ +#define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x2_t __s1 = __p1; \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ __ret; \ }) #else -#define vld4_f16(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 8); \ +#define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x2_t __s1 = __p1; \ + int8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_s32(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 2); \ +#define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x2_t __s1 = __p1; \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 9); \ __ret; \ }) #else -#define vld4_s32(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 2); \ +#define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x2_t __s1 = __p1; \ + float32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + float32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 3); \ +#define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x2_t __s1 = __p1; \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 2); \ __ret; \ }) #else -#define vld4_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 3); \ +#define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x2_t __s1 = __p1; \ + int32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + int32x2x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_s16(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 1); \ +#define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x2_t __s1 = __p1; \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 1); \ __ret; \ }) #else -#define vld4_s16(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 1); \ +#define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x2_t __s1 = __p1; \ + int16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + int16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_p8(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ +#define vld3_p8(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld4_dup_p8(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ +#define vld3_p8(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_p16(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ +#define vld3_p16(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld4_dup_p16(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ +#define vld3_p16(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_u8(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ +#define vld3q_p8(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 36); \ __ret; \ }) #else -#define vld4_dup_u8(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ +#define vld3q_p8(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_u32(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ +#define vld3q_p16(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 37); \ __ret; \ }) #else -#define vld4_dup_u32(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ +#define vld3q_p16(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ +#define vld3q_u8(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld4_dup_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ +#define vld3q_u8(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 48); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_u16(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ +#define vld3q_u32(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld4_dup_u16(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ +#define vld3q_u32(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_s8(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ +#define vld3q_u16(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 49); \ __ret; \ }) #else -#define vld4_dup_s8(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ +#define vld3q_u16(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_f32(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ +#define vld3q_s8(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld4_dup_f32(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ +#define vld3q_s8(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_f16(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ +#define vld3q_f32(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld4_dup_f16(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ +#define vld3q_f32(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_s32(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ +#define vld3q_s32(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld4_dup_s32(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ +#define vld3q_s32(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4_dup_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ - __ret; \ -}) -#else -#define vld4_dup_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_dup_s16(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ +#define vld3q_s16(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld4_dup_s16(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ +#define vld3q_s16(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x4_t __s1 = __p1; \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ +#define vld3_u8(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 16); \ __ret; \ }) #else -#define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8x4_t __s1 = __p1; \ - poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ +#define vld3_u8(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x4_t __s1 = __p1; \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ +#define vld3_u32(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 18); \ __ret; \ }) #else -#define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4x4_t __s1 = __p1; \ - poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - poly16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ +#define vld3_u32(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x4_t __s1 = __p1; \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ +#define vld3_u64(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 19); \ __ret; \ }) #else -#define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8x4_t __s1 = __p1; \ - poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld3_u64(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x4_t __s1 = __p1; \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ +#define vld3_u16(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 17); \ __ret; \ }) #else -#define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4x4_t __s1 = __p1; \ - uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ +#define vld3_u16(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x4_t __s1 = __p1; \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ +#define vld3_s8(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 0); \ __ret; \ }) #else -#define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8x4_t __s1 = __p1; \ - uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ +#define vld3_s8(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x4_t __s1 = __p1; \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 41); \ +#define vld3_f32(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 9); \ __ret; \ }) #else -#define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4x4_t __s1 = __p1; \ - float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 41); \ +#define vld3_f32(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ +#define vld3_s32(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 2); \ __ret; \ }) #else -#define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ +#define vld3_s32(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x4_t __s1 = __p1; \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 34); \ +#define vld3_s64(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 3); \ __ret; \ }) #else -#define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4x4_t __s1 = __p1; \ - int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 34); \ +#define vld3_s64(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_s16(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 1); \ + __ret; \ +}) +#else +#define vld3_s16(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x4_t __s1 = __p1; \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 33); \ +#define vld3_dup_p8(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ __ret; \ }) #else -#define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8x4_t __s1 = __p1; \ - int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 33); \ +#define vld3_dup_p8(__p0) __extension__ ({ \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x4_t __s1 = __p1; \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ +#define vld3_dup_p16(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ __ret; \ }) #else -#define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8x4_t __s1 = __p1; \ - uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ +#define vld3_dup_p16(__p0) __extension__ ({ \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_dup_p8(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ + __ret; \ +}) +#else +#define vld3q_dup_p8(__p0) __extension__ ({ \ + poly8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_dup_p16(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ + __ret; \ +}) +#else +#define vld3q_dup_p16(__p0) __extension__ ({ \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x4_t __s1 = __p1; \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ +#define vld3q_dup_u8(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ __ret; \ }) #else -#define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2x4_t __s1 = __p1; \ - uint32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - uint32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ +#define vld3q_dup_u8(__p0) __extension__ ({ \ + uint8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x4_t __s1 = __p1; \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ +#define vld3q_dup_u32(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ __ret; \ }) #else -#define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4x4_t __s1 = __p1; \ - uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - uint16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ +#define vld3q_dup_u32(__p0) __extension__ ({ \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x4_t __s1 = __p1; \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ +#define vld3q_dup_u64(__p0) __extension__ ({ \ + uint64x2x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ __ret; \ }) #else -#define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8x4_t __s1 = __p1; \ - int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ +#define vld3q_dup_u64(__p0) __extension__ ({ \ + uint64x2x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_dup_u16(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ + __ret; \ +}) +#else +#define vld3q_dup_u16(__p0) __extension__ ({ \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x4_t __s1 = __p1; \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 9); \ +#define vld3q_dup_s8(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ __ret; \ }) #else -#define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2x4_t __s1 = __p1; \ - float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - float32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 9); \ +#define vld3q_dup_s8(__p0) __extension__ ({ \ + int8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ +#define vld3q_dup_f32(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ __ret; \ }) #else -#define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - float16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ +#define vld3q_dup_f32(__p0) __extension__ ({ \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x4_t __s1 = __p1; \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 2); \ +#define vld3q_dup_s32(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ __ret; \ }) #else -#define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2x4_t __s1 = __p1; \ - int32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - int32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 2); \ +#define vld3q_dup_s32(__p0) __extension__ ({ \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_dup_s64(__p0) __extension__ ({ \ + int64x2x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ + __ret; \ +}) +#else +#define vld3q_dup_s64(__p0) __extension__ ({ \ + int64x2x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x4_t __s1 = __p1; \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 1); \ +#define vld3q_dup_s16(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ __ret; \ }) #else -#define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4x4_t __s1 = __p1; \ - int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - int16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 1); \ +#define vld3q_dup_s16(__p0) __extension__ ({ \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} +#define vld3_dup_u8(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ + __ret; \ +}) #else -__ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_u8(__p0) __extension__ ({ \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); - return __ret; -} +#define vld3_dup_u32(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ + __ret; \ +}) #else -__ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_u32(__p0) __extension__ ({ \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); - return __ret; -} +#define vld3_dup_u64(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ + __ret; \ +}) #else -__ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_u64(__p0) __extension__ ({ \ + uint64x1x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} +#define vld3_dup_u16(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ + __ret; \ +}) #else -__ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_u16(__p0) __extension__ ({ \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); - return __ret; -} +#define vld3_dup_s8(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ + __ret; \ +}) #else -__ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_s8(__p0) __extension__ ({ \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} +#define vld3_dup_f32(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ + __ret; \ +}) #else -__ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_f32(__p0) __extension__ ({ \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} +#define vld3_dup_s32(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ + __ret; \ +}) #else -__ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vld3_dup_s32(__p0) __extension__ ({ \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); - return __ret; -} -#else -__ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { +#define vld3_dup_s64(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ + __ret; \ +}) +#else +#define vld3_dup_s64(__p0) __extension__ ({ \ + int64x1x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_dup_s16(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ + __ret; \ +}) +#else +#define vld3_dup_s16(__p0) __extension__ ({ \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x3_t __s1 = __p1; \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ + __ret; \ +}) +#else +#define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x3_t __s1 = __p1; \ + poly8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x3_t __s1 = __p1; \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ + __ret; \ +}) +#else +#define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x3_t __s1 = __p1; \ + poly16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + poly16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x3_t __s1 = __p1; \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ + __ret; \ +}) +#else +#define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x3_t __s1 = __p1; \ + poly16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x3_t __s1 = __p1; \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ + __ret; \ +}) +#else +#define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x3_t __s1 = __p1; \ + uint32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + uint32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x3_t __s1 = __p1; \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ + __ret; \ +}) +#else +#define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x3_t __s1 = __p1; \ + uint16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x3_t __s1 = __p1; \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 41); \ + __ret; \ +}) +#else +#define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x3_t __s1 = __p1; \ + float32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + float32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 41); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x3_t __s1 = __p1; \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 34); \ + __ret; \ +}) +#else +#define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x3_t __s1 = __p1; \ + int32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + int32x4x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x3_t __s1 = __p1; \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 33); \ + __ret; \ +}) +#else +#define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x3_t __s1 = __p1; \ + int16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x3_t __s1 = __p1; \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ + __ret; \ +}) +#else +#define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x3_t __s1 = __p1; \ + uint8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x3_t __s1 = __p1; \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ + __ret; \ +}) +#else +#define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x3_t __s1 = __p1; \ + uint32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + uint32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x3_t __s1 = __p1; \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ + __ret; \ +}) +#else +#define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x3_t __s1 = __p1; \ + uint16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + uint16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x3_t __s1 = __p1; \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ + __ret; \ +}) +#else +#define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x3_t __s1 = __p1; \ + int8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x3_t __s1 = __p1; \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 9); \ + __ret; \ +}) +#else +#define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x3_t __s1 = __p1; \ + float32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + float32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x3_t __s1 = __p1; \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 2); \ + __ret; \ +}) +#else +#define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x3_t __s1 = __p1; \ + int32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + int32x2x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x3_t __s1 = __p1; \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 1); \ + __ret; \ +}) +#else +#define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x3_t __s1 = __p1; \ + int16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + int16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 1); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_p8(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 4); \ + __ret; \ +}) +#else +#define vld4_p8(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 4); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_p16(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 5); \ + __ret; \ +}) +#else +#define vld4_p16(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 5); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_p8(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 36); \ + __ret; \ +}) +#else +#define vld4q_p8(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 36); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_p16(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 37); \ + __ret; \ +}) +#else +#define vld4q_p16(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 37); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_u8(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 48); \ + __ret; \ +}) +#else +#define vld4q_u8(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 48); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_u32(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 50); \ + __ret; \ +}) +#else +#define vld4q_u32(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 50); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_u16(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 49); \ + __ret; \ +}) +#else +#define vld4q_u16(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 49); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_s8(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 32); \ + __ret; \ +}) +#else +#define vld4q_s8(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 32); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_f32(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 41); \ + __ret; \ +}) +#else +#define vld4q_f32(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 41); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_s32(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 34); \ + __ret; \ +}) +#else +#define vld4q_s32(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_s16(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 33); \ + __ret; \ +}) +#else +#define vld4q_s16(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_u8(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 16); \ + __ret; \ +}) +#else +#define vld4_u8(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 16); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_u32(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 18); \ + __ret; \ +}) +#else +#define vld4_u32(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 18); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_u64(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 19); \ + __ret; \ +}) +#else +#define vld4_u64(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_u16(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 17); \ + __ret; \ +}) +#else +#define vld4_u16(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_s8(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 0); \ + __ret; \ +}) +#else +#define vld4_s8(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 0); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_f32(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 9); \ + __ret; \ +}) +#else +#define vld4_f32(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_s32(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 2); \ + __ret; \ +}) +#else +#define vld4_s32(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_s64(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 3); \ + __ret; \ +}) +#else +#define vld4_s64(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_s16(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 1); \ + __ret; \ +}) +#else +#define vld4_s16(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 1); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_p8(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ + __ret; \ +}) +#else +#define vld4_dup_p8(__p0) __extension__ ({ \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_p16(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ + __ret; \ +}) +#else +#define vld4_dup_p16(__p0) __extension__ ({ \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_p8(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ + __ret; \ +}) +#else +#define vld4q_dup_p8(__p0) __extension__ ({ \ + poly8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_p16(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ + __ret; \ +}) +#else +#define vld4q_dup_p16(__p0) __extension__ ({ \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_u8(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ + __ret; \ +}) +#else +#define vld4q_dup_u8(__p0) __extension__ ({ \ + uint8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_u32(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ + __ret; \ +}) +#else +#define vld4q_dup_u32(__p0) __extension__ ({ \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_u64(__p0) __extension__ ({ \ + uint64x2x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ + __ret; \ +}) +#else +#define vld4q_dup_u64(__p0) __extension__ ({ \ + uint64x2x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_u16(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ + __ret; \ +}) +#else +#define vld4q_dup_u16(__p0) __extension__ ({ \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_s8(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ + __ret; \ +}) +#else +#define vld4q_dup_s8(__p0) __extension__ ({ \ + int8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_f32(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ + __ret; \ +}) +#else +#define vld4q_dup_f32(__p0) __extension__ ({ \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_s32(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ + __ret; \ +}) +#else +#define vld4q_dup_s32(__p0) __extension__ ({ \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_s64(__p0) __extension__ ({ \ + int64x2x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ + __ret; \ +}) +#else +#define vld4q_dup_s64(__p0) __extension__ ({ \ + int64x2x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_s16(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ + __ret; \ +}) +#else +#define vld4q_dup_s16(__p0) __extension__ ({ \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_u8(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ + __ret; \ +}) +#else +#define vld4_dup_u8(__p0) __extension__ ({ \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_u32(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ + __ret; \ +}) +#else +#define vld4_dup_u32(__p0) __extension__ ({ \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_u64(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ + __ret; \ +}) +#else +#define vld4_dup_u64(__p0) __extension__ ({ \ + uint64x1x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_u16(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ + __ret; \ +}) +#else +#define vld4_dup_u16(__p0) __extension__ ({ \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_s8(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ + __ret; \ +}) +#else +#define vld4_dup_s8(__p0) __extension__ ({ \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_f32(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ + __ret; \ +}) +#else +#define vld4_dup_f32(__p0) __extension__ ({ \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_s32(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ + __ret; \ +}) +#else +#define vld4_dup_s32(__p0) __extension__ ({ \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_s64(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ + __ret; \ +}) +#else +#define vld4_dup_s64(__p0) __extension__ ({ \ + int64x1x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_s16(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ + __ret; \ +}) +#else +#define vld4_dup_s16(__p0) __extension__ ({ \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x4_t __s1 = __p1; \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ + __ret; \ +}) +#else +#define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8x4_t __s1 = __p1; \ + poly8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x4_t __s1 = __p1; \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ + __ret; \ +}) +#else +#define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4x4_t __s1 = __p1; \ + poly16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + poly16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x4_t __s1 = __p1; \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ + __ret; \ +}) +#else +#define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8x4_t __s1 = __p1; \ + poly16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x4_t __s1 = __p1; \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ + __ret; \ +}) +#else +#define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4x4_t __s1 = __p1; \ + uint32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + uint32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x4_t __s1 = __p1; \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ + __ret; \ +}) +#else +#define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8x4_t __s1 = __p1; \ + uint16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x4_t __s1 = __p1; \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 41); \ + __ret; \ +}) +#else +#define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4x4_t __s1 = __p1; \ + float32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + float32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 41); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x4_t __s1 = __p1; \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 34); \ + __ret; \ +}) +#else +#define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4x4_t __s1 = __p1; \ + int32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + int32x4x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 34); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x4_t __s1 = __p1; \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 33); \ + __ret; \ +}) +#else +#define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8x4_t __s1 = __p1; \ + int16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 33); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x4_t __s1 = __p1; \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ + __ret; \ +}) +#else +#define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8x4_t __s1 = __p1; \ + uint8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x4_t __s1 = __p1; \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ + __ret; \ +}) +#else +#define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2x4_t __s1 = __p1; \ + uint32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + uint32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x4_t __s1 = __p1; \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ + __ret; \ +}) +#else +#define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4x4_t __s1 = __p1; \ + uint16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + uint16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x4_t __s1 = __p1; \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ + __ret; \ +}) +#else +#define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8x4_t __s1 = __p1; \ + int8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x4_t __s1 = __p1; \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 9); \ + __ret; \ +}) +#else +#define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2x4_t __s1 = __p1; \ + float32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + float32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 9); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x4_t __s1 = __p1; \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 2); \ + __ret; \ +}) +#else +#define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2x4_t __s1 = __p1; \ + int32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + int32x2x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 2); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x4_t __s1 = __p1; \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 1); \ + __ret; \ +}) +#else +#define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4x4_t __s1 = __p1; \ + int16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + int16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 1); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + return __ret; +} +#else +__ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + return __ret; +} +#else +__ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + return __ret; +} +#else +__ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + return __ret; +} +#else +__ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} +#else +__ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} +#else +__ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + return __ret; +} +#else +__ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} +#else +__ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; @@ -18208,7375 +19564,8355 @@ __ai uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { return __ret; } #else -__ai uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { +__ai uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + return __ret; +} +#else +__ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} +#else +__ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + return __ret; +} +#else +__ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + return __ret; +} +#else +__ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} +#else +__ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + return __ret; +} +#else +__ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} +#else +__ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + return __ret; +} +#else +__ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + return __ret; +} +#else +__ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + return __ret; +} +#else +__ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#else +__ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + return __ret; +} +#else +__ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret; \ +}) +#else +#define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret; \ +}) +#else +#define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret; \ +}) +#else +#define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + return __ret; +} +#else +__ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + return __ret; +} +#else +__ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + return __ret; +} +#else +__ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + return __ret; +} +#else +__ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else -__ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { +__ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else -__ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { +__ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { +__ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else -__ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { +__ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { +__ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { +__ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { +__ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { +__ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); + __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { +__ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else -__ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { +__ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); + __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { +__ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#else +__ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + return __ret; +} +#else +__ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret; \ +}) +#else +#define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret; \ +}) +#else +#define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret; \ +}) +#else +#define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret; \ +}) +#else +#define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret; \ +}) +#else +#define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret; \ +}) +#else +#define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 34); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret; \ +}) +#else +#define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret; \ +}) +#else +#define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vqshl_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vqshl_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) +#else +#define vqshl_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vqshl_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 0); \ + __ret; \ +}) +#else +#define vqshl_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 2); \ + __ret; \ +}) +#else +#define vqshl_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret; \ +}) +#else +#define vqshl_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshl_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 1); \ + __ret; \ +}) +#else +#define vqshl_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret; \ +}) +#else +#define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret; \ +}) +#else +#define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret; \ +}) +#else +#define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret; \ +}) +#else +#define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) #else -__ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} +#define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); - return __ret; -} +#define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 17); \ + __ret; \ +}) #else -__ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else -#define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else -#define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else -#define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else -#define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else -#define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ +#define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrun_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else -#define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrun_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else -#define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ +#define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ +#define __noswap_vqshrun_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { +__ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { +__ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { +__ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { +__ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { +__ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { +__ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { +__ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { +__ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } +__ai int32x4_t __noswap_vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else -__ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai int16x8_t __noswap_vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} +#else +__ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + return __ret; +} +#else +__ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} +#else +__ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + return __ret; +} +#else +__ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + return __ret; +} +#else +__ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + return __ret; +} +#else +__ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai int32x2_t __noswap_vqsub_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#else +__ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + return __ret; +} +#else +__ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai int16x4_t __noswap_vqsub_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + return __ret; +} +#else +__ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai uint16x4_t __noswap_vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + return __ret; +} +#else +__ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai uint32x2_t __noswap_vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + return __ret; +} +#else +__ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai uint8x8_t __noswap_vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + return __ret; +} +#else +__ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai int16x4_t __noswap_vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + return __ret; +} +#else +__ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai int32x2_t __noswap_vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + return __ret; +} +#else +__ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } +__ai int8x8_t __noswap_vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 50); + return __ret; +} +#else +__ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); +__ai float32x4_t vrecpeq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t vrecpeq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { +__ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { +__ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float32x2_t vrecpe_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 9); return __ret; } #else -__ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float32x2_t vrecpe_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); +__ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); +__ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else -__ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); +__ai poly8x8_t vrev16_p8(poly8x8_t __p0) { + poly8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else -__ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai poly8x8_t vrev16_p8(poly8x8_t __p0) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); +__ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { + poly8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } #else -__ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); +__ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); +__ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { + uint8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } #else -__ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 48); \ - __ret; \ -}) -#else -#define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 50); \ - __ret; \ -}) -#else -#define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 51); \ - __ret; \ -}) -#else -#define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 49); \ - __ret; \ -}) -#else -#define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 32); \ - __ret; \ -}) -#else -#define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 34); \ - __ret; \ -}) -#else -#define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 35); \ - __ret; \ -}) -#else -#define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 33); \ - __ret; \ -}) -#else -#define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 16); \ - __ret; \ -}) -#else -#define vqshl_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 18); \ - __ret; \ -}) -#else -#define vqshl_n_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#else -#define vqshl_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 17); \ - __ret; \ -}) -#else -#define vqshl_n_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 0); \ - __ret; \ -}) -#else -#define vqshl_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 2); \ - __ret; \ -}) -#else -#define vqshl_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshl_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai int8x16_t vrev16q_s8(int8x16_t __p0) { + int8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + return __ret; +} #else -#define vqshl_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai int8x16_t vrev16q_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshl_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai uint8x8_t vrev16_u8(uint8x8_t __p0) { + uint8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} #else -#define vqshl_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint8x8_t vrev16_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 48); \ - __ret; \ -}) +__ai int8x8_t vrev16_s8(int8x8_t __p0) { + int8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} #else -#define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int8x8_t vrev16_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 50); \ - __ret; \ -}) +__ai poly8x8_t vrev32_p8(poly8x8_t __p0) { + poly8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + return __ret; +} #else -#define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai poly8x8_t vrev32_p8(poly8x8_t __p0) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 51); \ - __ret; \ -}) +__ai poly16x4_t vrev32_p16(poly16x4_t __p0) { + poly16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); + return __ret; +} #else -#define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai poly16x4_t vrev32_p16(poly16x4_t __p0) { + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + poly16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 49); \ - __ret; \ -}) +__ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { + poly8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + return __ret; +} #else -#define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { + poly16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} #else -#define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { + uint8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + return __ret; +} #else -#define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { + uint16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} #else -#define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai int8x16_t vrev32q_s8(int8x16_t __p0) { + int8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + return __ret; +} #else -#define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int8x16_t vrev32q_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai int16x8_t vrev32q_s16(int16x8_t __p0) { + int16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} #else -#define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai int16x8_t vrev32q_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai uint8x8_t vrev32_u8(uint8x8_t __p0) { + uint8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + return __ret; +} #else -#define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai uint8x8_t vrev32_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai uint16x4_t vrev32_u16(uint16x4_t __p0) { + uint16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); + return __ret; +} #else -#define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai uint16x4_t vrev32_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai int8x8_t vrev32_s8(int8x8_t __p0) { + int8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + return __ret; +} #else -#define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai int8x8_t vrev32_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) +__ai int16x4_t vrev32_s16(int16x4_t __p0) { + int16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); + return __ret; +} #else -#define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) +__ai int16x4_t vrev32_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) +__ai poly8x8_t vrev64_p8(poly8x8_t __p0) { + poly8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #else -#define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) +__ai poly8x8_t vrev64_p8(poly8x8_t __p0) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai poly16x4_t vrev64_p16(poly16x4_t __p0) { + poly16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + return __ret; +} #else -#define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrun_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai poly16x4_t vrev64_p16(poly16x4_t __p0) { + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + poly16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { + poly8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + return __ret; +} #else -#define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrun_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { + poly16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + return __ret; +} #else -#define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vqshrun_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } #else -__ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { +__ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else -__ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { +__ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); - return __ret; -} -#else -__ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else -__ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } #else -__ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); +__ai float32x4_t vrev64q_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else -__ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); +__ai float32x4_t vrev64q_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai int32x4_t __noswap_vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); +__ai int32x4_t vrev64q_s32(int32x4_t __p0) { + int32x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else -__ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int32x4_t vrev64q_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} -#else -__ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x8_t vrev64q_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } -__ai int16x8_t __noswap_vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { +#else +__ai int16x8_t vrev64q_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { +__ai uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #else -__ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { +__ai uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { +__ai uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else -__ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { +__ai uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#else -__ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { +__ai uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else -__ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { +__ai uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #else -__ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); +__ai float32x2_t vrev64_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else -__ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); +__ai float32x2_t vrev64_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai int32x2_t __noswap_vqsub_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); - return __ret; -} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); +__ai int32x2_t vrev64_s32(int32x2_t __p0) { + int32x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else -__ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); +__ai int32x2_t vrev64_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { +__ai int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else -__ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { +__ai int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai int16x4_t __noswap_vqsub_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); - return __ret; -} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); +__ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai uint16x4_t __noswap_vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); +__ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); +__ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai uint32x2_t __noswap_vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); +__ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); +__ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -__ai uint8x8_t __noswap_vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); - return __ret; -} #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); +__ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai int16x4_t __noswap_vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); +__ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); +__ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai int32x2_t __noswap_vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); +__ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); +__ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -__ai int8x8_t __noswap_vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 50); - return __ret; -} -#else -__ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrecpeq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 41); +__ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else -__ai float32x4_t vrecpeq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { +__ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 18); + __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else -__ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { +__ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrecpe_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 9); - return __ret; -} -#else -__ai float32x2_t vrecpe_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 9); + __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); +__ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vrev16_p8(poly8x8_t __p0) { - poly8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); +__ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else -__ai poly8x8_t vrev16_p8(poly8x8_t __p0) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { - poly8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); +__ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else -__ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { +__ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { +__ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrev16q_s8(int8x16_t __p0) { - int8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - return __ret; -} -#else -__ai int8x16_t vrev16q_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrev16_u8(uint8x8_t __p0) { - uint8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); - return __ret; -} -#else -__ai uint8x8_t vrev16_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrev16_s8(int8x8_t __p0) { - int8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); - return __ret; -} -#else -__ai int8x8_t vrev16_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vrev32_p8(poly8x8_t __p0) { - poly8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); - return __ret; -} -#else -__ai poly8x8_t vrev32_p8(poly8x8_t __p0) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vrev32_p16(poly16x4_t __p0) { - poly16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); +__ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai poly16x4_t vrev32_p16(poly16x4_t __p0) { - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); +__ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { - poly8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); +__ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { - poly16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); +__ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); +__ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); +__ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); +__ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { - uint16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); +__ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrev32q_s8(int8x16_t __p0) { - int8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); +#ifdef __LITTLE_ENDIAN__ +__ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else -__ai int8x16_t vrev32q_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vrev32q_s16(int16x8_t __p0) { +__ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int16x8_t vrev32q_s16(int16x8_t __p0) { +__ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrev32_u8(uint8x8_t __p0) { +__ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else -__ai uint8x8_t vrev32_u8(uint8x8_t __p0) { +__ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vrev32_u16(uint16x4_t __p0) { - uint16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); +__ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else -__ai uint16x4_t vrev32_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrev32_s8(int8x8_t __p0) { - int8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); +__ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai int8x8_t vrev32_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vrev32_s16(int16x4_t __p0) { - int16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); +__ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai int16x4_t vrev32_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); +__ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vrev64_p8(poly8x8_t __p0) { - poly8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai poly8x8_t vrev64_p8(poly8x8_t __p0) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vrev64_p16(poly16x4_t __p0) { - poly16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else -__ai poly16x4_t vrev64_p16(poly16x4_t __p0) { - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { - poly8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +__ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #else -__ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { - poly16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); +__ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else -__ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - return __ret; -} +#define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret; \ +}) #else -__ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { - uint32x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); - return __ret; -} +#define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret; \ +}) #else -__ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { - uint16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); - return __ret; -} +#define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret; \ +}) #else -__ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret; \ +}) +#else +#define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret; \ +}) +#else +#define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret; \ +}) +#else +#define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 34); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret; \ +}) +#else +#define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret; \ +}) +#else +#define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vrshr_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vrshr_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) +#else +#define vrshr_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vrshr_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 0); \ + __ret; \ +}) +#else +#define vrshr_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 2); \ + __ret; \ +}) +#else +#define vrshr_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret; \ +}) +#else +#define vrshr_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshr_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 1); \ + __ret; \ +}) +#else +#define vrshr_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#else +#define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrev64q_s8(int8x16_t __p0) { - int8x16_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - return __ret; -} +#define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret; \ +}) #else -__ai int8x16_t vrev64q_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrev64q_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); - return __ret; -} +#define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret; \ +}) #else -__ai float32x4_t vrev64q_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vrev64q_s32(int32x4_t __p0) { - int32x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); - return __ret; -} +#define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret; \ +}) #else -__ai int32x4_t vrev64q_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vrshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vrev64q_s16(int16x8_t __p0) { - int16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); +__ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai int16x8_t vrev64q_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrev64_u8(uint8x8_t __p0) { - uint8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai uint8x8_t vrev64_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrev64_u32(uint32x2_t __p0) { +__ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vrev64_u32(uint32x2_t __p0) { +__ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); + __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vrev64_u16(uint16x4_t __p0) { - uint16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai float32x2_t vrsqrte_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 9); return __ret; } #else -__ai uint16x4_t vrev64_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vrsqrte_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrev64_s8(int8x8_t __p0) { - int8x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai int8x8_t vrev64_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrev64_f32(float32x2_t __p0) { +__ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else -__ai float32x2_t vrev64_f32(float32x2_t __p0) { +__ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); + __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vrev64_s32(int32x2_t __p0) { - int32x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1, 0); - return __ret; -} +#define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret; \ +}) #else -__ai int32x2_t vrev64_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vrev64_s16(int16x4_t __p0) { - int16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret; \ +}) #else -__ai int16x4_t vrev64_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} +#define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret; \ +}) #else -__ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); - return __ret; -} +#define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret; \ +}) #else -__ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); - return __ret; -} +#define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret; \ +}) #else -__ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} +#define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret; \ +}) #else -__ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} +#define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret; \ +}) #else -__ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} +#define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret; \ +}) #else -__ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); - return __ret; -} +#define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret; \ +}) #else -__ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); - return __ret; -} +#define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret; \ +}) #else -__ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); - return __ret; -} +#define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret; \ +}) +#else +#define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret; \ +}) #else -__ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); - return __ret; -} +#define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret; \ +}) #else -__ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); - return __ret; -} +#define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret; \ +}) #else -__ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); - return __ret; -} +#define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret; \ +}) #else -__ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} +#define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret; \ +}) #else -__ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); +__ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #else -__ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { +__ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } +__ai uint16x4_t __noswap_vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #else -__ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { +__ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } +__ai uint32x2_t __noswap_vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #else -__ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { +__ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} -#else -__ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8x8_t __noswap_vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); +__ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #else -__ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } +__ai int16x4_t __noswap_vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); +__ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #else -__ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} -#else -__ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t __noswap_vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); +__ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #else -__ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); +__ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); - return __ret; -} -#else -__ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#else -__ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); - return __ret; -} -#else -__ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); - return __ret; -} +#define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #else -__ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); - return __ret; -} +#define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #else -__ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} +#define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #else -__ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} +#define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); - return __ret; -} +#define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #else -__ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ +#define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ +#define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ +#define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ +#define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ +#define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) +#else +#define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ +#define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ +#define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ +#define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ +#define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ +#define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) +#define __noswap_vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) +#define __noswap_vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ +#define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ +#define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ +#define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 2); \ +#define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 2); \ +#define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) +#define __noswap_vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ +#define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ +#define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshr_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 1); \ +#define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshr_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) +#define __noswap_vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ +#define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else -#define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ +#define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vrshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ +#define __noswap_vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + return __ret; +} #else -#define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vrshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) +__ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + return __ret; +} +#else +__ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + return __ret; +} +#else +__ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + return __ret; +} +#else +__ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + return __ret; +} +#else +__ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} +#else +__ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + return __ret; +} #else -#define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vrshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) +__ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + return __ret; +} #else -#define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vrshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) +__ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} #else -#define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vrshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) +__ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) +__ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + return __ret; +} #else -#define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vrshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) +__ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 50); +__ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 41); +__ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 41); +__ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 18); +__ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrsqrte_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 9); +__ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else -__ai float32x2_t vrsqrte_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 9); +__ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #else -__ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else -__ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else -#define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else -#define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ + __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else -#define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ + __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else -#define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ + __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else -#define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else -#define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ + __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else -#define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ + __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else -#define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ + __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else -#define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ + __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else -#define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ + __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #else -#define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ + __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else -#define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ + __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else -#define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ + __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #else -#define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else -#define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ + __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); - return __ret; -} -#else -__ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai uint16x4_t __noswap_vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); - return __ret; -} -#else -__ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai uint32x2_t __noswap_vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); - return __ret; -} -#else -__ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -__ai uint8x8_t __noswap_vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); - return __ret; -} -#else -__ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai int16x4_t __noswap_vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); - return __ret; -} +#define vshll_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + __ret; \ +}) #else -__ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai int32x2_t __noswap_vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); - return __ret; -} +#define vshll_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vshll_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); - return __ret; -} +#define vshll_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ + __ret; \ +}) #else -__ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -__ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); - return __ret; -} +#define vshll_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vshll_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#define vshll_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ __ret; \ }) #else -#define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshll_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshll_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define vshll_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ __ret; \ }) #else -#define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vshll_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshll_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ +#define vshll_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ __ret; \ }) #else -#define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshll_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8_t __s0 = __p0; \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ +#define __noswap_vshll_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ +#define vshll_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ __ret; \ }) #else -#define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshll_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 34); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16_t __s0 = __p0; \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ +#define __noswap_vshll_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ +#define vshrq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else -#define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshrq_n_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ +#define vshrq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else -#define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ +#define vshrq_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ +#define vshrq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else -#define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ +#define vshrq_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ +#define vshrq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else -#define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshrq_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ +#define vshrq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else -#define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshrq_n_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ +#define vshrq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else -#define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__rev1, __p2); \ +#define vshrq_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshrq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret; \ +}) +#else +#define vshrq_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ +#define vshrq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else -#define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vshrq_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshr_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret; \ +}) +#else +#define vshr_n_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ +#define vshr_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else -#define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ +#define vshr_n_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ +#define vshr_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #else -#define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vshr_n_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#define __noswap_vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshr_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret; \ +}) +#else +#define vshr_n_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#define vshr_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else -#define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ +#define vshr_n_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshr_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 2); \ + __ret; \ +}) +#else +#define vshr_n_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ +#define vshr_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #else -#define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vshr_n_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#define __noswap_vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshr_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 1); \ + __ret; \ +}) +#else +#define vshr_n_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshrn_n_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else -#define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 18); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshrn_n_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else -#define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ +#define vshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshrn_n_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else -#define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshrn_n_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ +#define vshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else -#define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ +#define vshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ +#define __noswap_vshrn_n_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else -#define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define vshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ +#define __noswap_vshrn_n_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ __ret; \ }) #else -#define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ +#define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} -#else -__ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); - return __ret; -} -#else -__ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); - return __ret; -} -#else -__ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); - return __ret; -} -#else -__ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} -#else -__ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} -#else -__ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); - return __ret; -} -#else -__ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} -#else -__ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); - return __ret; -} -#else -__ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); - return __ret; -} -#else -__ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#else -__ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); - return __ret; -} -#else -__ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); - return __ret; -} -#else -__ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); - return __ret; -} +#define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ + __ret; \ +}) #else -__ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} +#define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret; \ +}) #else -__ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} +#define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); - return __ret; -} +#define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret; \ +}) #else -__ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_u8(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else -#define vshlq_n_u8(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_u32(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else -#define vshlq_n_u32(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_u64(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else -#define vshlq_n_u64(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_u16(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else -#define vshlq_n_u16(__p0, __p1) __extension__ ({ \ +#define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_s8(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else -#define vshlq_n_s8(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_s32(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else -#define vshlq_n_s32(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_s64(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else -#define vshlq_n_s64(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshlq_n_s16(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else -#define vshlq_n_s16(__p0, __p1) __extension__ ({ \ +#define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_u8(__p0, __p1) __extension__ ({ \ +#define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else -#define vshl_n_u8(__p0, __p1) __extension__ ({ \ +#define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_u32(__p0, __p1) __extension__ ({ \ +#define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else -#define vshl_n_u32(__p0, __p1) __extension__ ({ \ +#define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_u64(__p0, __p1) __extension__ ({ \ +#define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #else -#define vshl_n_u64(__p0, __p1) __extension__ ({ \ +#define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_u16(__p0, __p1) __extension__ ({ \ +#define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else -#define vshl_n_u16(__p0, __p1) __extension__ ({ \ +#define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_s8(__p0, __p1) __extension__ ({ \ +#define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else -#define vshl_n_s8(__p0, __p1) __extension__ ({ \ +#define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_s32(__p0, __p1) __extension__ ({ \ +#define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else -#define vshl_n_s32(__p0, __p1) __extension__ ({ \ +#define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_s64(__p0, __p1) __extension__ ({ \ +#define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #else -#define vshl_n_s64(__p0, __p1) __extension__ ({ \ +#define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshl_n_s16(__p0, __p1) __extension__ ({ \ +#define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else -#define vshl_n_s16(__p0, __p1) __extension__ ({ \ +#define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret; \ +}) +#else +#define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret; \ +}) +#else +#define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else -#define vshll_n_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vshll_n_u8(__p0, __p1) __extension__ ({ \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret; \ + __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret; \ +}) +#else +#define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret; \ +}) +#else +#define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s0 = __p0; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret; \ +}) +#else +#define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_u32(__p0, __p1) __extension__ ({ \ +#define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ + uint32x2_t __s1 = __p1; \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else -#define vshll_n_u32(__p0, __p1) __extension__ ({ \ +#define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 51); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x2_t __ret; \ + __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) -#define __noswap_vshll_n_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret; \ +}) +#else +#define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_u16(__p0, __p1) __extension__ ({ \ +#define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else -#define vshll_n_u16(__p0, __p1) __extension__ ({ \ +#define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 50); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vshll_n_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_s8(__p0, __p1) __extension__ ({ \ +#define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ + int8x8_t __s1 = __p1; \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else -#define vshll_n_s8(__p0, __p1) __extension__ ({ \ +#define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 33); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vshll_n_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret; \ +}) +#else +#define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x2_t __ret; \ + __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret; \ +}) +#else +#define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret; \ +}) +#else +#define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ + __ret; \ +}) +#else +#define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret; \ + __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ +#define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ __ret; \ }) #else -#define vshll_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vshll_n_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ +#define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + poly16x4_t __ret; \ + __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ +#define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ __ret; \ }) #else -#define vshll_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret; \ + __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vshll_n_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret; \ + __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_u8(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else -#define vshrq_n_u8(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 48); \ + __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_u32(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else -#define vshrq_n_u32(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 50); \ + __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_u64(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else -#define vshrq_n_u64(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s0 = __p0; \ + uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 51); \ + __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_u16(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else -#define vshrq_n_u16(__p0, __p1) __extension__ ({ \ +#define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 49); \ + __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_s8(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else -#define vshrq_n_s8(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 32); \ + __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_s32(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else -#define vshrq_n_s32(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 34); \ + __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_s64(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else -#define vshrq_n_s64(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s0 = __p0; \ + int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 35); \ + __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrq_n_s16(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else -#define vshrq_n_s16(__p0, __p1) __extension__ ({ \ +#define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 33); \ + __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_u8(__p0, __p1) __extension__ ({ \ +#define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else -#define vshr_n_u8(__p0, __p1) __extension__ ({ \ +#define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 16); \ + __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_u32(__p0, __p1) __extension__ ({ \ +#define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else -#define vshr_n_u32(__p0, __p1) __extension__ ({ \ +#define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 18); \ + __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_u64(__p0, __p1) __extension__ ({ \ +#define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #else -#define vshr_n_u64(__p0, __p1) __extension__ ({ \ +#define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ + uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_u16(__p0, __p1) __extension__ ({ \ +#define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else -#define vshr_n_u16(__p0, __p1) __extension__ ({ \ +#define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 17); \ + __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_s8(__p0, __p1) __extension__ ({ \ +#define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else -#define vshr_n_s8(__p0, __p1) __extension__ ({ \ +#define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 0); \ + __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_s32(__p0, __p1) __extension__ ({ \ +#define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else -#define vshr_n_s32(__p0, __p1) __extension__ ({ \ +#define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 2); \ + __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_s64(__p0, __p1) __extension__ ({ \ +#define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #else -#define vshr_n_s64(__p0, __p1) __extension__ ({ \ +#define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ + int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshr_n_s16(__p0, __p1) __extension__ ({ \ +#define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else -#define vshr_n_s16(__p0, __p1) __extension__ ({ \ +#define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) -#else -#define vshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vshrn_n_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) -#else -#define vshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vshrn_n_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) -#else -#define vshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vshrn_n_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) -#else -#define vshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 1); \ + __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) -#define __noswap_vshrn_n_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) -#else -#define vshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vshrn_n_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) -#else -#define vshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vshrn_n_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ - __ret; \ -}) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ +#define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 4); \ }) #else -#define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ +#define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ +#define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 5); \ }) #else -#define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ +#define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ +#define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 36); \ }) #else -#define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ +#define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ +#define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 37); \ }) #else -#define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ +#define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ +#define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 48); \ }) #else -#define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ +#define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 50); \ }) #else -#define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ +#define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 51); \ }) #else -#define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ +#define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 49); \ }) #else -#define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ +#define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 32); \ }) #else -#define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ +#define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vst1q_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s1 = __p1; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 41); \ +}) +#else +#define vst1q_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s1 = __p1; \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 41); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 34); \ }) #else -#define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ +#define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 35); \ }) #else -#define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ +#define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 33); \ }) #else -#define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 16); \ }) #else -#define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ +#define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 18); \ }) #else -#define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ +#define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vst1_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ }) #else -#define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vst1_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ +#define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 17); \ }) #else -#define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ +#define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ +#define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 0); \ }) #else -#define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ +#define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ +#define vst1_f32(__p0, __p1) __extension__ ({ \ + float32x2_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 9); \ +}) +#else +#define vst1_f32(__p0, __p1) __extension__ ({ \ + float32x2_t __s1 = __p1; \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 9); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 2); \ }) #else -#define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ +#define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ +#define vst1_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ }) #else -#define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ +#define vst1_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ +#define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 1); \ }) #else -#define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ +#define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ +#define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ +}) +#else +#define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x8_t __s1 = __p1; \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ +}) +#else +#define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x4_t __s1 = __p1; \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ +}) +#else +#define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __s1 = __p1; \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ +}) +#else +#define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __s1 = __p1; \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ }) #else -#define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ +#define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ }) #else -#define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ +#define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ }) #else -#define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ +#define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ }) #else -#define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ +#define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ }) #else -#define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ +#define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ +}) +#else +#define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s1 = __p1; \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ }) #else -#define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ +#define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ }) #else -#define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ +#define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ }) #else -#define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ }) #else -#define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ +#define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ +#define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ }) #else -#define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ +#define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ }) #else -#define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ +#define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ - __ret; \ -}) -#else -#define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ - __ret; \ -}) -#else -#define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ - __ret; \ +#define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ }) #else -#define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s1 = __p1; \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ +#define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ }) #else -#define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ +#define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x8_t __s1 = __p1; \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ - __ret; \ +#define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ }) #else -#define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s1 = __p1; \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ - __ret; \ +#define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ }) #else -#define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s1 = __p1; \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ - __ret; \ +#define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ }) #else -#define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ + int64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ - __ret; \ +#define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ }) #else -#define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s1 = __p1; \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ - __ret; \ +#define vst1_p8_x2(__p0, __p1) __extension__ ({ \ + poly8x8x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ }) #else -#define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1_p8_x2(__p0, __p1) __extension__ ({ \ + poly8x8x2_t __s1 = __p1; \ + poly8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ - __ret; \ +#define vst1_p16_x2(__p0, __p1) __extension__ ({ \ + poly16x4x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ }) #else -#define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1_p16_x2(__p0, __p1) __extension__ ({ \ + poly16x4x2_t __s1 = __p1; \ + poly16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ - __ret; \ +#define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ + poly8x16x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ }) #else -#define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ + poly8x16x2_t __s1 = __p1; \ + poly8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ - __ret; \ +#define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ + poly16x8x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ }) #else -#define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ + poly16x8x2_t __s1 = __p1; \ + poly16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ - __ret; \ +#define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ + uint8x16x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ }) #else -#define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ + uint8x16x2_t __s1 = __p1; \ + uint8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ - __ret; \ +#define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ + uint32x4x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ }) #else -#define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ + uint32x4x2_t __s1 = __p1; \ + uint32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ - __ret; \ +#define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ + uint64x2x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ }) #else -#define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ + uint64x2x2_t __s1 = __p1; \ + uint64x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ - __ret; \ +#define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ + uint16x8x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ }) #else -#define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ + uint16x8x2_t __s1 = __p1; \ + uint16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ - __ret; \ +#define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ + int8x16x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ }) #else -#define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ + int8x16x2_t __s1 = __p1; \ + int8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ - __ret; \ +#define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ + float32x4x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 41); \ }) #else -#define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ + float32x4x2_t __s1 = __p1; \ + float32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ - __ret; \ +#define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ + int32x4x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 34); \ }) #else -#define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ + int32x4x2_t __s1 = __p1; \ + int32x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ +#define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ + int64x2x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 35); \ }) #else -#define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ +#define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ + int64x2x2_t __s1 = __p1; \ + int64x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ - __ret; \ +#define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ + int16x8x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 33); \ }) #else -#define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ + int16x8x2_t __s1 = __p1; \ + int16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ - __ret; \ +#define vst1_u8_x2(__p0, __p1) __extension__ ({ \ + uint8x8x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ }) #else -#define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vst1_u8_x2(__p0, __p1) __extension__ ({ \ + uint8x8x2_t __s1 = __p1; \ + uint8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ - __ret; \ +#define vst1_u32_x2(__p0, __p1) __extension__ ({ \ + uint32x2x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ }) #else -#define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vst1_u32_x2(__p0, __p1) __extension__ ({ \ + uint32x2x2_t __s1 = __p1; \ + uint32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ +#define vst1_u64_x2(__p0, __p1) __extension__ ({ \ + uint64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) #else -#define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ +#define vst1_u64_x2(__p0, __p1) __extension__ ({ \ + uint64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ - __ret; \ +#define vst1_u16_x2(__p0, __p1) __extension__ ({ \ + uint16x4x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ +}) +#else +#define vst1_u16_x2(__p0, __p1) __extension__ ({ \ + uint16x4x2_t __s1 = __p1; \ + uint16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1_s8_x2(__p0, __p1) __extension__ ({ \ + int8x8x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ }) #else -#define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vst1_s8_x2(__p0, __p1) __extension__ ({ \ + int8x8x2_t __s1 = __p1; \ + int8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p8(__p0, __p1) __extension__ ({ \ - poly8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 4); \ +#define vst1_f32_x2(__p0, __p1) __extension__ ({ \ + float32x2x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 9); \ }) #else -#define vst1_p8(__p0, __p1) __extension__ ({ \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 4); \ +#define vst1_f32_x2(__p0, __p1) __extension__ ({ \ + float32x2x2_t __s1 = __p1; \ + float32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p16(__p0, __p1) __extension__ ({ \ - poly16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 5); \ +#define vst1_s32_x2(__p0, __p1) __extension__ ({ \ + int32x2x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 2); \ }) #else -#define vst1_p16(__p0, __p1) __extension__ ({ \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 5); \ +#define vst1_s32_x2(__p0, __p1) __extension__ ({ \ + int32x2x2_t __s1 = __p1; \ + int32x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 36); \ +#define vst1_s64_x2(__p0, __p1) __extension__ ({ \ + int64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ }) #else -#define vst1q_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 36); \ +#define vst1_s64_x2(__p0, __p1) __extension__ ({ \ + int64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 37); \ +#define vst1_s16_x2(__p0, __p1) __extension__ ({ \ + int16x4x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 1); \ }) #else -#define vst1q_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 37); \ +#define vst1_s16_x2(__p0, __p1) __extension__ ({ \ + int16x4x2_t __s1 = __p1; \ + int16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 48); \ +#define vst1_p8_x3(__p0, __p1) __extension__ ({ \ + poly8x8x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ }) #else -#define vst1q_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 48); \ +#define vst1_p8_x3(__p0, __p1) __extension__ ({ \ + poly8x8x3_t __s1 = __p1; \ + poly8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 50); \ +#define vst1_p16_x3(__p0, __p1) __extension__ ({ \ + poly16x4x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ }) #else -#define vst1q_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 50); \ +#define vst1_p16_x3(__p0, __p1) __extension__ ({ \ + poly16x4x3_t __s1 = __p1; \ + poly16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 51); \ +#define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ + poly8x16x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ }) #else -#define vst1q_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 51); \ +#define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ + poly8x16x3_t __s1 = __p1; \ + poly8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 49); \ +#define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ + poly16x8x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ }) #else -#define vst1q_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 49); \ +#define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ + poly16x8x3_t __s1 = __p1; \ + poly16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 32); \ +#define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ + uint8x16x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ }) #else -#define vst1q_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 32); \ +#define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ + uint8x16x3_t __s1 = __p1; \ + uint8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 41); \ +#define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ + uint32x4x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ }) #else -#define vst1q_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 41); \ +#define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ + uint32x4x3_t __s1 = __p1; \ + uint32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 40); \ +#define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ + uint64x2x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ }) #else -#define vst1q_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 40); \ +#define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ + uint64x2x3_t __s1 = __p1; \ + uint64x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 34); \ +#define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ + uint16x8x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ }) #else -#define vst1q_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 34); \ +#define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ + uint16x8x3_t __s1 = __p1; \ + uint16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 35); \ +#define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ + int8x16x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ }) #else -#define vst1q_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 35); \ +#define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ + int8x16x3_t __s1 = __p1; \ + int8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 33); \ +#define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ + float32x4x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 41); \ }) #else -#define vst1q_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 33); \ +#define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ + float32x4x3_t __s1 = __p1; \ + float32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 16); \ +#define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ + int32x4x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 34); \ }) #else -#define vst1_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 16); \ +#define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ + int32x4x3_t __s1 = __p1; \ + int32x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 18); \ +#define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ + int64x2x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 35); \ }) #else -#define vst1_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 18); \ +#define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ + int64x2x3_t __s1 = __p1; \ + int64x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ +#define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ + int16x8x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 33); \ }) #else -#define vst1_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ +#define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ + int16x8x3_t __s1 = __p1; \ + int16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 17); \ +#define vst1_u8_x3(__p0, __p1) __extension__ ({ \ + uint8x8x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ }) #else -#define vst1_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 17); \ +#define vst1_u8_x3(__p0, __p1) __extension__ ({ \ + uint8x8x3_t __s1 = __p1; \ + uint8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 0); \ +#define vst1_u32_x3(__p0, __p1) __extension__ ({ \ + uint32x2x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ }) #else -#define vst1_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 0); \ +#define vst1_u32_x3(__p0, __p1) __extension__ ({ \ + uint32x2x3_t __s1 = __p1; \ + uint32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f32(__p0, __p1) __extension__ ({ \ - float32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 9); \ +#define vst1_u64_x3(__p0, __p1) __extension__ ({ \ + uint64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) #else -#define vst1_f32(__p0, __p1) __extension__ ({ \ - float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 9); \ +#define vst1_u64_x3(__p0, __p1) __extension__ ({ \ + uint64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 8); \ +#define vst1_u16_x3(__p0, __p1) __extension__ ({ \ + uint16x4x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ }) #else -#define vst1_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 8); \ +#define vst1_u16_x3(__p0, __p1) __extension__ ({ \ + uint16x4x3_t __s1 = __p1; \ + uint16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 2); \ +#define vst1_s8_x3(__p0, __p1) __extension__ ({ \ + int8x8x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ }) #else -#define vst1_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 2); \ +#define vst1_s8_x3(__p0, __p1) __extension__ ({ \ + int8x8x3_t __s1 = __p1; \ + int8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ +#define vst1_f32_x3(__p0, __p1) __extension__ ({ \ + float32x2x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 9); \ }) #else -#define vst1_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ +#define vst1_f32_x3(__p0, __p1) __extension__ ({ \ + float32x2x3_t __s1 = __p1; \ + float32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 1); \ +#define vst1_s32_x3(__p0, __p1) __extension__ ({ \ + int32x2x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 2); \ }) #else -#define vst1_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 1); \ +#define vst1_s32_x3(__p0, __p1) __extension__ ({ \ + int32x2x3_t __s1 = __p1; \ + int32x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ +#define vst1_s64_x3(__p0, __p1) __extension__ ({ \ + int64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ }) #else -#define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ +#define vst1_s64_x3(__p0, __p1) __extension__ ({ \ + int64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ +#define vst1_s16_x3(__p0, __p1) __extension__ ({ \ + int16x4x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 1); \ }) #else -#define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ +#define vst1_s16_x3(__p0, __p1) __extension__ ({ \ + int16x4x3_t __s1 = __p1; \ + int16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ +#define vst1_p8_x4(__p0, __p1) __extension__ ({ \ + poly8x8x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ }) #else -#define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ - poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ +#define vst1_p8_x4(__p0, __p1) __extension__ ({ \ + poly8x8x4_t __s1 = __p1; \ + poly8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ +#define vst1_p16_x4(__p0, __p1) __extension__ ({ \ + poly16x4x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ }) #else -#define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ - poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ +#define vst1_p16_x4(__p0, __p1) __extension__ ({ \ + poly16x4x4_t __s1 = __p1; \ + poly16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ +#define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ + poly8x16x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ }) #else -#define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ +#define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ + poly8x16x4_t __s1 = __p1; \ + poly8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ +#define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ + poly16x8x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ }) #else -#define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ +#define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ + poly16x8x4_t __s1 = __p1; \ + poly16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ +#define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ + uint8x16x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ }) #else -#define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ +#define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ + uint8x16x4_t __s1 = __p1; \ + uint8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ +#define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ + uint32x4x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ }) #else -#define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ +#define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ + uint32x4x4_t __s1 = __p1; \ + uint32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ +#define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ + uint64x2x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ }) #else -#define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ +#define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ + uint64x2x4_t __s1 = __p1; \ + uint64x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ +#define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ + uint16x8x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ }) #else -#define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ +#define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ + uint16x8x4_t __s1 = __p1; \ + uint16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ +#define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ + int8x16x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ }) #else -#define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ +#define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ + int8x16x4_t __s1 = __p1; \ + int8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ +#define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ + float32x4x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 41); \ }) #else -#define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ +#define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ + float32x4x4_t __s1 = __p1; \ + float32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ +#define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ + int32x4x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 34); \ }) #else -#define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ +#define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ + int32x4x4_t __s1 = __p1; \ + int32x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ +#define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ + int64x2x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 35); \ }) #else -#define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ +#define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ + int64x2x4_t __s1 = __p1; \ + int64x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ +#define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ + int16x8x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 33); \ }) #else -#define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ - uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ +#define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ + int16x8x4_t __s1 = __p1; \ + int16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ +#define vst1_u8_x4(__p0, __p1) __extension__ ({ \ + uint8x8x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ }) #else -#define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ +#define vst1_u8_x4(__p0, __p1) __extension__ ({ \ + uint8x8x4_t __s1 = __p1; \ + uint8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ +#define vst1_u32_x4(__p0, __p1) __extension__ ({ \ + uint32x2x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ }) #else -#define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ +#define vst1_u32_x4(__p0, __p1) __extension__ ({ \ + uint32x2x4_t __s1 = __p1; \ + uint32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ +#define vst1_u64_x4(__p0, __p1) __extension__ ({ \ + uint64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) #else -#define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ +#define vst1_u64_x4(__p0, __p1) __extension__ ({ \ + uint64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ +#define vst1_u16_x4(__p0, __p1) __extension__ ({ \ + uint16x4x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ }) #else -#define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ - int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ +#define vst1_u16_x4(__p0, __p1) __extension__ ({ \ + uint16x4x4_t __s1 = __p1; \ + uint16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ +#define vst1_s8_x4(__p0, __p1) __extension__ ({ \ + int8x8x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ }) #else -#define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ +#define vst1_s8_x4(__p0, __p1) __extension__ ({ \ + int8x8x4_t __s1 = __p1; \ + int8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ +#define vst1_f32_x4(__p0, __p1) __extension__ ({ \ + float32x2x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 9); \ }) #else -#define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ +#define vst1_f32_x4(__p0, __p1) __extension__ ({ \ + float32x2x4_t __s1 = __p1; \ + float32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ +#define vst1_s32_x4(__p0, __p1) __extension__ ({ \ + int32x2x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 2); \ }) #else -#define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ +#define vst1_s32_x4(__p0, __p1) __extension__ ({ \ + int32x2x4_t __s1 = __p1; \ + int32x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ +#define vst1_s64_x4(__p0, __p1) __extension__ ({ \ + int64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ }) #else -#define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ +#define vst1_s64_x4(__p0, __p1) __extension__ ({ \ + int64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ +#define vst1_s16_x4(__p0, __p1) __extension__ ({ \ + int16x4x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 1); \ }) #else -#define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ +#define vst1_s16_x4(__p0, __p1) __extension__ ({ \ + int16x4x4_t __s1 = __p1; \ + int16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 1); \ }) #endif @@ -25715,21 +28051,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst2q_f16(__p0, __p1) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 40); \ -}) -#else -#define vst2q_f16(__p0, __p1) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ @@ -25847,21 +28168,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst2_f16(__p0, __p1) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 8); \ -}) -#else -#define vst2_f16(__p0, __p1) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ @@ -25994,21 +28300,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 40); \ -}) -#else -#define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ @@ -26114,21 +28405,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 8); \ -}) -#else -#define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ @@ -26303,22 +28579,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst3q_f16(__p0, __p1) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ -}) -#else -#define vst3q_f16(__p0, __p1) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ @@ -26443,22 +28703,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst3_f16(__p0, __p1) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ -}) -#else -#define vst3_f16(__p0, __p1) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ @@ -26599,22 +28843,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ -}) -#else -#define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ @@ -26727,22 +28955,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ -}) -#else -#define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ @@ -26928,23 +29140,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst4q_f16(__p0, __p1) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ -}) -#else -#define vst4q_f16(__p0, __p1) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ @@ -27076,23 +29271,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst4_f16(__p0, __p1) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ -}) -#else -#define vst4_f16(__p0, __p1) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ @@ -27241,23 +29419,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ -}) -#else -#define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ @@ -27377,23 +29538,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ -}) -#else -#define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ @@ -33681,19689 +35825,19467 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { } #endif +#ifdef __LITTLE_ENDIAN__ +#define vld1q_f16(__p0) __extension__ ({ \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ + __ret; \ +}) +#else +#define vld1q_f16(__p0) __extension__ ({ \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif -#if __ARM_ARCH >= 8 + #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__p0, 34); - return __ret; -} +#define vld1_f16(__p0) __extension__ ({ \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ + __ret; \ +}) #else -__ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_f16(__p0) __extension__ ({ \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__p0, 2); - return __ret; -} +#define vld1q_dup_f16(__p0) __extension__ ({ \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ + __ret; \ +}) #else -__ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld1q_dup_f16(__p0) __extension__ ({ \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__p0, 50); - return __ret; -} +#define vld1_dup_f16(__p0) __extension__ ({ \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ + __ret; \ +}) #else -__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_dup_f16(__p0) __extension__ ({ \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__p0, 18); - return __ret; -} +#define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ + __ret; \ +}) #else -__ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__p0, 34); - return __ret; -} +#define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ + __ret; \ +}) #else -__ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__p0, 2); - return __ret; -} +#define vld1q_f16_x2(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld1q_f16_x2(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__p0, 50); - return __ret; -} +#define vld1_f16_x2(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_f16_x2(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__p0, 18); - return __ret; -} +#define vld1q_f16_x3(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld1q_f16_x3(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__p0, 34); - return __ret; -} +#define vld1_f16_x3(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_f16_x3(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__p0, 2); - return __ret; -} +#define vld1q_f16_x4(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld1q_f16_x4(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__p0, 50); - return __ret; -} +#define vld1_f16_x4(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld1_f16_x4(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__p0, 18); - return __ret; -} +#define vld2q_f16(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld2q_f16(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__p0, 34); - return __ret; -} +#define vld2_f16(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld2_f16(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__p0, 2); - return __ret; -} +#define vld2q_dup_f16(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld2q_dup_f16(__p0) __extension__ ({ \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld2_dup_f16(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ + __ret; \ +}) +#else +#define vld2_dup_f16(__p0) __extension__ ({ \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__p0, 50); - return __ret; -} +#define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 40); \ + __ret; \ +}) #else -__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + float16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8x2_t __ret; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__p0, 18); - return __ret; -} +#define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 8); \ + __ret; \ +}) #else -__ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + float16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + float16x4x2_t __ret; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret; \ +}) #endif -#endif -#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld3q_f16(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3q_f16(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrnd_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld3_f16(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrnd_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld3_f16(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndaq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld3q_dup_f16(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndaq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3q_dup_f16(__p0) __extension__ ({ \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrnda_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld3_dup_f16(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrnda_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld3_dup_f16(__p0) __extension__ ({ \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndmq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndmq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + float16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8x3_t __ret; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrndm_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrndm_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + float16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + float16x4x3_t __ret; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndnq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld4q_f16(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndnq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld4q_f16(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrndn_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld4_f16(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrndn_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld4_f16(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndpq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld4q_dup_f16(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndpq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld4q_dup_f16(__p0) __extension__ ({ \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrndp_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld4_dup_f16(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrndp_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld4_dup_f16(__p0) __extension__ ({ \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndxq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 41); - return __ret; -} +#define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ + __ret; \ +}) #else -__ai float32x4_t vrndxq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + float16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8x4_t __ret; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrndx_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 9); - return __ret; -} +#define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ + __ret; \ +}) #else -__ai float32x2_t vrndx_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + float16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + float16x4x4_t __ret; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret; \ +}) #endif -#endif -#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); - return __ret; -} +#define vst1q_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 40); \ +}) #else -__ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vst1q_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); - return __ret; -} +#define vst1_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 8); \ +}) #else -__ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst1_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); - return __ret; -} +#define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ +}) #else -__ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); - return __ret; -} +#define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ +}) #else -__ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ +}) #endif -#endif -#if __ARM_ARCH >= 8 && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__p0, 35); - return __ret; -} -#else -__ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 40); \ +}) +#else +#define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + float16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst1_f16_x2(__p0, __p1) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 8); \ +}) #else -__ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst1_f16_x2(__p0, __p1) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + float16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__p0, 51); - return __ret; -} +#define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ +}) #else -__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + float16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst1_f16_x3(__p0, __p1) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ +}) #else -__ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst1_f16_x3(__p0, __p1) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + float16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__p0, 35); - return __ret; -} +#define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ +}) #else -__ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + float16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst1_f16_x4(__p0, __p1) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ +}) #else -__ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst1_f16_x4(__p0, __p1) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + float16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__p0, 51); - return __ret; -} +#define vst2q_f16(__p0, __p1) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 40); \ +}) #else -__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst2q_f16(__p0, __p1) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + float16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst2_f16(__p0, __p1) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 8); \ +}) #else -__ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst2_f16(__p0, __p1) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + float16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__p0, 35); - return __ret; -} +#define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 40); \ +}) #else -__ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x2_t __s1 = __p1; \ + float16x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 8); \ +}) #else -__ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x2_t __s1 = __p1; \ + float16x4x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__p0, 51); - return __ret; -} +#define vst3q_f16(__p0, __p1) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ +}) #else -__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst3q_f16(__p0, __p1) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + float16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst3_f16(__p0, __p1) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ +}) #else -__ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst3_f16(__p0, __p1) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + float16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__p0, 35); - return __ret; -} +#define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ +}) #else -__ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x3_t __s1 = __p1; \ + float16x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ +}) #else -__ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); - return __ret; -} +#define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x3_t __s1 = __p1; \ + float16x4x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__p0, 51); - return __ret; -} +#define vst4q_f16(__p0, __p1) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ +}) #else -__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vst4q_f16(__p0, __p1) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + float16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst4_f16(__p0, __p1) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ +}) #else -__ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); - return __ret; -} +#define vst4_f16(__p0, __p1) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + float16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} +#define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ +}) #else -__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} +#define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8x4_t __s1 = __p1; \ + float16x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} +#define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ +}) #else -__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} +#define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4x4_t __s1 = __p1; \ + float16x4x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ + __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ +}) #endif +#endif +#if __ARM_ARCH >= 8 #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__p0, 34); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__rev0, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__p0, 2); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__rev0, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__p0, 50); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__p0, 18); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__p0, 34); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__rev0, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__p0, 2); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__rev0, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__p0, 50); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__p0, 18); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__p0, 34); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__rev0, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__p0, 2); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__rev0, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__p0, 50); return __ret; } #else -__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); +__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__p0, 18); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__p0, 34); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__rev0, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__p0, 2); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__rev0, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__p0, 50); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__p0, 18); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO) #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaeseq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaeseq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesimcq_u8(uint8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesimcq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesimcq_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesimcq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesmcq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vaesmcq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32_t vsha1h_u32(uint32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32_t vsha1h_u32(uint32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); return __ret; } #else -__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrnd_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrnd_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndaq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndaq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrnda_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrnda_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndiq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndiq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrndi_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x2_t vrndi_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndmq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); +__ai float32x4_t vrndmq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndm_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndm_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndnq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndnq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndn_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndn_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32_t vrndns_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32_t vrndns_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndpq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndpq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndp_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndp_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndxq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 41); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x4_t vrndxq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndx_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 9); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float32x2_t vrndx_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrnd_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrnd_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndaq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndaq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrnda_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrnda_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndmq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x8_t vrndmq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrndm_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); +__ai float16x4_t vrndm_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndnq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndnq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndn_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndn_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndpq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndpq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndp_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndp_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndxq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vrndxq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndx_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 8); return __ret; } #else -__ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vrndx_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else -__ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else -__ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); +__ai float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__p0, 35); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__p0, 51); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__p0, 35); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__p0, 51); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__p0, 35); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__p0, 51); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__p0, 35); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); return __ret; } #else -__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); +__ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__p0, 51); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +#ifdef __LITTLE_ENDIAN__ +__ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #else -__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); +__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +#else +__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); +__ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +#ifdef __LITTLE_ENDIAN__ +__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +#else +__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #else -__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); +__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +#else +__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); +__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +#else +__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); +__ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif -#endif -#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 42); +__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai float64x2_t vrndq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrnd_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); +__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai float64x1_t vrnd_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); +__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndaq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 42); +__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai float64x2_t vrndaq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrnda_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); +__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai float64x1_t vrnda_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); +__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndiq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 42); +__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #else -__ai float64x2_t vrndiq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vrndiq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 41); +__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float32x4_t vrndiq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrndi_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vrndi_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vrndi_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 9); +__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float32x2_t vrndi_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndmq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 42); +__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x2_t vrndmq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrndm_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vrndm_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndnq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 42); +__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x2_t vrndnq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrndn_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vrndn_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndpq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 42); +__ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x2_t vrndpq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrndp_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vrndp_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrndxq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 42); +__ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x2_t vrndxq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrndx_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vrndx_f64(float64x1_t __p0) { +__ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif -#endif -#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { +__ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = (float64x1_t)(__p0); return __ret; } #else -__ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { +__ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = (float64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif -#endif -#if __ARM_FEATURE_CRYPTO #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); +__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaeseq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); +__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaeseq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vaesimcq_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesimcq_v((int8x16_t)__p0, 48); +__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint8x16_t vaesimcq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesimcq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesmcq_v((int8x16_t)__p0, 48); +__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesmcq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); +__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vsha1h_u32(uint32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); +__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32_t vsha1h_u32(uint32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); +__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); +__ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); +__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__p0, (int8x16_t)__p1, 50); +__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); +__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); +__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__p0, (int8x16_t)__p1, 50); +__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); +__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif -#endif -#if defined(__ARM_FEATURE_FMA) #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); +__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai float32x4_t __noswap_vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); +__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); +__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float32x2_t __noswap_vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); +__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { - float32x4_t __ret; - __ret = vfmaq_f32(__p0, -__p1, __p2); +__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - float32x4_t __ret; - __ret = __noswap_vfmaq_f32(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { - float32x2_t __ret; - __ret = vfma_f32(__p0, -__p1, __p2); +__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float32x2_t __ret; - __ret = __noswap_vfma_f32(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif -#endif -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vabsq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 40); +__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x8_t vabsq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vabs_f16(float16x4_t __p0) { +__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 8); + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x4_t vabs_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = __p0 + __p1; +__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; - __ret = __p0 + __p1; + __ret = (float16x4_t)(__p0); return __ret; } #else -__ai float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); +__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; - __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = (float16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); +__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); +__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcage_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcagt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcale_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcalt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 == __p1); +__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 == __p1); +__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqzq_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); +__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #else -__ai uint16x8_t vceqzq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceqz_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); +__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vceqz_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 >= __p1); +__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 >= __p1); +__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgezq_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); +__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vcgezq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcgez_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); +__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai uint16x4_t vcgez_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +#else +__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 > __p1); +__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 > __p1); +__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgtzq_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); +__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vcgtzq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcgtz_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); +__ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vcgtz_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 <= __p1); +__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 <= __p1); +__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vclezq_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); +__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x8_t vclezq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vclez_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); +__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #else -__ai uint16x4_t vclez_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 < __p1); +__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 < __p1); +__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcltzq_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); +__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai uint16x8_t vcltzq_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcltz_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); +__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai uint16x4_t vcltz_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__p0, 49); +__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vcvtq_f16_s16(int16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__p0, 33); +__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai float16x8_t vcvtq_f16_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vcvt_f16_u16(uint16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__p0, 17); +__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai float16x4_t vcvt_f16_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vcvt_f16_s16(int16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__p0, 1); +__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #else -__ai float16x4_t vcvt_f16_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__s0, __p1, 49); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #else -#define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__s0, __p1, 33); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #else -#define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #else -#define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__s0, __p1, 1); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #else -#define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_v((int8x16_t)__s0, __p1, 33); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #else -#define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} #endif -#ifdef __LITTLE_ENDIAN__ -#define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_v((int8x8_t)__s0, __p1, 1); \ - __ret; \ -}) -#else -#define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) #endif - +#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_v((int8x16_t)__s0, __p1, 49); \ - __ret; \ -}) +__ai float64x2_t vrndq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 42); + return __ret; +} #else -#define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vrndq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_v((int8x8_t)__s0, __p1, 17); \ - __ret; \ -}) +__ai float64x1_t vrnd_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); + return __ret; +} #else -#define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x1_t vrnd_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vcvtq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtq_s16_v((int8x16_t)__p0, 33); +__ai float64x2_t vrndaq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai int16x8_t vcvtq_s16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtq_s16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndaq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcvt_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvt_s16_v((int8x8_t)__p0, 1); +__ai float64x1_t vrnda_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); return __ret; } #else -__ai int16x4_t vcvt_s16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvt_s16_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrnda_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_v((int8x16_t)__p0, 49); +__ai float64x2_t vrndiq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndiq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcvt_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvt_u16_v((int8x8_t)__p0, 17); +__ai float64x1_t vrndi_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); return __ret; } #else -__ai uint16x4_t vcvt_u16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvt_u16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrndi_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_v((int8x16_t)__p0, 33); +__ai float64x2_t vrndmq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndmq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcvta_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvta_s16_v((int8x8_t)__p0, 1); - return __ret; -} -#else -__ai int16x4_t vcvta_s16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvta_s16_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrndm_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); + return __ret; +} +#else +__ai float64x1_t vrndm_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_v((int8x16_t)__p0, 49); +__ai float64x2_t vrndnq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndnq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcvta_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvta_u16_v((int8x8_t)__p0, 17); +__ai float64x1_t vrndn_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); return __ret; } #else -__ai uint16x4_t vcvta_u16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvta_u16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrndn_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_v((int8x16_t)__p0, 33); +__ai float64x2_t vrndpq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndpq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcvtm_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtm_s16_v((int8x8_t)__p0, 1); +__ai float64x1_t vrndp_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); return __ret; } #else -__ai int16x4_t vcvtm_s16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtm_s16_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrndp_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_v((int8x16_t)__p0, 49); +__ai float64x2_t vrndxq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrndxq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_v((int8x8_t)__p0, 17); +__ai float64x1_t vrndx_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); return __ret; } #else -__ai uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrndx_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); return __ret; } #endif +#endif +#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_v((int8x16_t)__p0, 33); +__ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcvtn_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtn_s16_v((int8x8_t)__p0, 1); +__ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #else -__ai int16x4_t vcvtn_s16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtn_s16_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_v((int8x16_t)__p0, 49); +__ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_v((int8x8_t)__p0, 17); +__ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #else -__ai uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif +#endif +#if defined(__ARM_FEATURE_DOTPROD) #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_v((int8x16_t)__p0, 33); +__ai uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vdotq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else -__ai int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vdotq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai uint32x4_t __noswap_vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vdotq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcvtp_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtp_s16_v((int8x8_t)__p0, 1); +__ai int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vdotq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else -__ai int16x4_t vcvtp_s16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtp_s16_v((int8x8_t)__rev0, 1); +__ai int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vdotq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } +__ai int32x4_t __noswap_vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vdotq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_v((int8x16_t)__p0, 49); +__ai uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vdot_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } #else -__ai uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vdot_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai uint32x2_t __noswap_vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vdot_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_v((int8x8_t)__p0, 17); +__ai int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vdot_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else -__ai uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vdot_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai int32x2_t __noswap_vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { + int32x2_t __ret; + __ret = (int32x2_t) __builtin_neon_vdot_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ +#define vdotq_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x8_t __s2 = __p2; \ + uint32x4_t __ret; \ +uint8x8_t __reint = __s2; \ +uint32x4_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = vdotq_u32(__s0, __s1, *(uint8x16_t *) &__reint1); \ __ret; \ }) #else -#define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdotq_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x8_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ +uint8x8_t __reint = __rev2; \ +uint32x4_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = __noswap_vdotq_u32(__rev0, __rev1, *(uint8x16_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vext_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 8); \ +#define vdotq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x8_t __s2 = __p2; \ + int32x4_t __ret; \ +int8x8_t __reint = __s2; \ +int32x4_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = vdotq_s32(__s0, __s1, *(int8x16_t *) &__reint1); \ __ret; \ }) #else -#define vext_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 8); \ +#define vdotq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ +int8x8_t __reint = __rev2; \ +int32x4_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = __noswap_vdotq_s32(__rev0, __rev1, *(int8x16_t *) &__reint1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); - return __ret; -} +#define vdot_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __s2 = __p2; \ + uint32x2_t __ret; \ +uint8x8_t __reint = __s2; \ +uint32x2_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3); \ + __ret = vdot_u32(__s0, __s1, *(uint8x8_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -__ai float16x8_t __noswap_vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); - return __ret; -} +#define vdot_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __s2 = __p2; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x2_t __ret; \ +uint8x8_t __reint = __rev2; \ +uint32x2_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3); \ + __ret = __noswap_vdot_u32(__rev0, __rev1, *(uint8x8_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); - return __ret; -} +#define vdot_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __s2 = __p2; \ + int32x2_t __ret; \ +int8x8_t __reint = __s2; \ +int32x2_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3); \ + __ret = vdot_s32(__s0, __s1, *(int8x8_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai float16x4_t __noswap_vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); - return __ret; -} +#define vdot_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x8_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret; \ +int8x8_t __reint = __rev2; \ +int32x2_t __reint1 = __builtin_shufflevector(*(uint32x2_t *) &__reint, *(uint32x2_t *) &__reint, __p3, __p3); \ + __ret = __noswap_vdot_s32(__rev0, __rev1, *(int8x8_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif -#ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __ret; - __ret = vfmaq_f16(__p0, -__p1, __p2); - return __ret; -} -#else -__ai float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __noswap_vfmaq_f16(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} #endif - +#if defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __ret; - __ret = vfma_f16(__p0, -__p1, __p2); - return __ret; -} +#define vdotq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __s2 = __p2; \ + uint32x4_t __ret; \ +uint8x16_t __reint = __s2; \ +uint32x4_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = vdotq_u32(__s0, __s1, *(uint8x16_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - float16x4_t __ret; - __ret = __noswap_vfma_f16(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vdotq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ +uint8x16_t __reint = __rev2; \ +uint32x4_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = __noswap_vdotq_u32(__rev0, __rev1, *(uint8x16_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); - return __ret; -} +#define vdotq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __s2 = __p2; \ + int32x4_t __ret; \ +int8x16_t __reint = __s2; \ +int32x4_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = vdotq_s32(__s0, __s1, *(int8x16_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vdotq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int8x16_t __s1 = __p1; \ + int8x16_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ +int8x16_t __reint = __rev2; \ +int32x4_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3, __p3, __p3); \ + __ret = __noswap_vdotq_s32(__rev0, __rev1, *(int8x16_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 8); - return __ret; -} +#define vdot_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x16_t __s2 = __p2; \ + uint32x2_t __ret; \ +uint8x16_t __reint = __s2; \ +uint32x2_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3); \ + __ret = vdot_u32(__s0, __s1, *(uint8x8_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vdot_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x16_t __s2 = __p2; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x2_t __ret; \ +uint8x16_t __reint = __rev2; \ +uint32x2_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3); \ + __ret = __noswap_vdot_u32(__rev0, __rev1, *(uint8x8_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); - return __ret; -} +#define vdot_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x16_t __s2 = __p2; \ + int32x2_t __ret; \ +int8x16_t __reint = __s2; \ +int32x2_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3); \ + __ret = vdot_s32(__s0, __s1, *(int8x8_t *) &__reint1); \ + __ret; \ +}) #else -__ai float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vdot_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int8x8_t __s1 = __p1; \ + int8x16_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret; \ +int8x16_t __reint = __rev2; \ +int32x2_t __reint1 = __builtin_shufflevector(*(uint32x4_t *) &__reint, *(uint32x4_t *) &__reint, __p3, __p3); \ + __ret = __noswap_vdot_s32(__rev0, __rev1, *(int8x8_t *) &__reint1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif +#endif +#if defined(__ARM_FEATURE_FMA) #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else -__ai float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); +__ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); - return __ret; -} -#else -__ai float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x4_t __noswap_vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else -__ai float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -#else -__ai float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t __noswap_vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { + float32x4_t __ret; + __ret = vfmaq_f32(__p0, __p1, (float32x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else -__ai float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); +__ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __noswap_vfmaq_f32(__rev0, __rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = __p0 * __p1; +__ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { + float32x2_t __ret; + __ret = vfma_f32(__p0, __p1, (float32x2_t) {__p2, __p2}); return __ret; } #else -__ai float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = __noswap_vfma_f32(__rev0, __rev1, (float32x2_t) {__p2, __p2}); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = __p0 * __p1; +__ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = vfmaq_f32(__p0, -__p1, __p2); return __ret; } #else -__ai float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = __rev0 * __rev1; +__ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __noswap_vfmaq_f32(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ - __ret; \ -}) -#else -#define vmulq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmul_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ - __ret; \ -}) -#else -#define vmul_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_n_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = __s0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ - __ret; \ -}) +__ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = vfma_f32(__p0, -__p1, __p2); + return __ret; +} #else -#define vmulq_n_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __rev0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = __noswap_vfma_f32(__rev0, -__rev1, __rev2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif -#ifdef __LITTLE_ENDIAN__ -#define vmul_n_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = __s0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ - __ret; \ -}) -#else -#define vmul_n_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __rev0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) #endif - +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vnegq_f16(float16x8_t __p0) { +__ai float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = -__p0; + __ret = (float16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float16x8_t vnegq_f16(float16x8_t __p0) { +__ai float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = -__rev0; + __ret = (float16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vneg_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = -__p0; - return __ret; -} -#else -__ai float16x4_t vneg_f16(float16x4_t __p0) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = (float16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = (float16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai float16x8_t vabsq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x8_t vabsq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = (float16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 8); return __ret; } #else -__ai float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = (float16x4_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrecpeq_f16(float16x8_t __p0) { +__ai float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 40); + __ret = __p0 + __p1; return __ret; } #else -__ai float16x8_t vrecpeq_f16(float16x8_t __p0) { +__ai float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 40); + __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrecpe_f16(float16x4_t __p0) { +__ai float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 8); + __ret = __p0 + __p1; return __ret; } #else -__ai float16x4_t vrecpe_f16(float16x4_t __p0) { +__ai float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 8); + __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else -__ai float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else -__ai float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrev64q_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); +__ai uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai float16x8_t vrev64q_f16(float16x8_t __p0) { +__ai uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrev64_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float16x4_t vrev64_f16(float16x4_t __p0) { +__ai uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcage_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai float16x8_t vrndq_f16(float16x8_t __p0) { +__ai uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 40); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrnd_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 8); +__ai uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float16x4_t vrnd_f16(float16x4_t __p0) { +__ai uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 8); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcagt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndaq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai float16x8_t vrndaq_f16(float16x8_t __p0) { +__ai uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 40); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrnda_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 8); +__ai uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float16x4_t vrnda_f16(float16x4_t __p0) { +__ai uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 8); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcale_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndmq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai float16x8_t vrndmq_f16(float16x8_t __p0) { +__ai uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 40); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrndm_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 8); +__ai uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai float16x4_t vrndm_f16(float16x4_t __p0) { +__ai uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 8); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcalt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndnq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0 == __p1); return __ret; } #else -__ai float16x8_t vrndnq_f16(float16x8_t __p0) { +__ai uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 40); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrndn_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 8); +__ai uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0 == __p1); return __ret; } #else -__ai float16x4_t vrndn_f16(float16x4_t __p0) { +__ai uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 8); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndpq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vceqzq_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x8_t vrndpq_f16(float16x8_t __p0) { +__ai uint16x8_t vceqzq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 40); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrndp_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 8); +__ai uint16x4_t vceqz_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai float16x4_t vrndp_f16(float16x4_t __p0) { +__ai uint16x4_t vceqz_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 8); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndxq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } #else -__ai float16x8_t vrndxq_f16(float16x8_t __p0) { +__ai uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 40); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrndx_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 8); +__ai uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } #else -__ai float16x4_t vrndx_f16(float16x4_t __p0) { +__ai uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 8); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrsqrteq_f16(float16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 40); +__ai uint16x8_t vcgezq_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x8_t vrsqrteq_f16(float16x8_t __p0) { +__ai uint16x8_t vcgezq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 40); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrsqrte_f16(float16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 8); +__ai uint16x4_t vcgez_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); return __ret; } #else -__ai float16x4_t vrsqrte_f16(float16x4_t __p0) { +__ai uint16x4_t vcgez_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 8); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0 > __p1); return __ret; } #else -__ai float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + uint16x8_t __ret; + __ret = (uint16x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0 > __p1); return __ret; } #else -__ai float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + uint16x4_t __ret; + __ret = (uint16x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = __p0 - __p1; +__ai uint16x8_t vcgtzq_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vcgtzq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __rev0 - __rev1; + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = __p0 - __p1; +__ai uint16x4_t vcgtz_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vcgtz_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = __rev0 - __rev1; + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } #else -__ai float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t)(__rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } #else -__ai float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t)(__rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai uint16x8_t vclezq_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vclezq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai uint16x4_t vclez_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); return __ret; } #else -__ai float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vclez_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0 < __p1); return __ret; } #else -__ai float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t)(__rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0 < __p1); return __ret; } #else -__ai float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); - - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t)(__rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif -#endif -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = __p0 / __p1; +__ai uint16x8_t vcltzq_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai uint16x8_t vcltzq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __rev0 / __rev1; + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = __p0 / __p1; +__ai uint16x4_t vcltz_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x4_t vcltz_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = __rev0 / __rev1; + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) -#else -#define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) -#define __noswap_vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ - __ret; \ -}) -#else -#define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ - __ret; \ -}) -#else -#define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) -#else -#define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) -#define __noswap_vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ - __ret; \ -}) -#else -#define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ - __ret; \ -}) -#else -#define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x8_t __ret; \ - __ret = vfmaq_f16(__s0, __s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ - __ret; \ -}) -#else -#define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __noswap_vfmaq_f16(__rev0, __rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = vfma_f16(__s0, __s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ - __ret; \ -}) -#else -#define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __noswap_vfma_f16(__rev0, __rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsh_lane_f16(__p0_0, __p1_0, __p2_0, __p3_0) __extension__ ({ \ - float16_t __s0_0 = __p0_0; \ - float16_t __s1_0 = __p1_0; \ - float16x4_t __s2_0 = __p2_0; \ - float16_t __ret_0; \ - __ret_0 = vfmah_lane_f16(__s0_0, -__s1_0, __s2_0, __p3_0); \ - __ret_0; \ -}) -#else -#define vfmsh_lane_f16(__p0_1, __p1_1, __p2_1, __p3_1) __extension__ ({ \ - float16_t __s0_1 = __p0_1; \ - float16_t __s1_1 = __p1_1; \ - float16x4_t __s2_1 = __p2_1; \ - float16x4_t __rev2_1; __rev2_1 = __builtin_shufflevector(__s2_1, __s2_1, 3, 2, 1, 0); \ - float16_t __ret_1; \ - __ret_1 = __noswap_vfmah_lane_f16(__s0_1, -__s1_1, __rev2_1, __p3_1); \ - __ret_1; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f16(__p0_2, __p1_2, __p2_2, __p3_2) __extension__ ({ \ - float16x8_t __s0_2 = __p0_2; \ - float16x8_t __s1_2 = __p1_2; \ - float16x4_t __s2_2 = __p2_2; \ - float16x8_t __ret_2; \ - __ret_2 = vfmaq_lane_f16(__s0_2, -__s1_2, __s2_2, __p3_2); \ - __ret_2; \ -}) -#else -#define vfmsq_lane_f16(__p0_3, __p1_3, __p2_3, __p3_3) __extension__ ({ \ - float16x8_t __s0_3 = __p0_3; \ - float16x8_t __s1_3 = __p1_3; \ - float16x4_t __s2_3 = __p2_3; \ - float16x8_t __rev0_3; __rev0_3 = __builtin_shufflevector(__s0_3, __s0_3, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_3; __rev1_3 = __builtin_shufflevector(__s1_3, __s1_3, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_3; __rev2_3 = __builtin_shufflevector(__s2_3, __s2_3, 3, 2, 1, 0); \ - float16x8_t __ret_3; \ - __ret_3 = __noswap_vfmaq_lane_f16(__rev0_3, -__rev1_3, __rev2_3, __p3_3); \ - __ret_3 = __builtin_shufflevector(__ret_3, __ret_3, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_3; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f16(__p0_4, __p1_4, __p2_4, __p3_4) __extension__ ({ \ - float16x4_t __s0_4 = __p0_4; \ - float16x4_t __s1_4 = __p1_4; \ - float16x4_t __s2_4 = __p2_4; \ - float16x4_t __ret_4; \ - __ret_4 = vfma_lane_f16(__s0_4, -__s1_4, __s2_4, __p3_4); \ - __ret_4; \ -}) +__ai float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__p0, 49); + return __ret; +} #else -#define vfms_lane_f16(__p0_5, __p1_5, __p2_5, __p3_5) __extension__ ({ \ - float16x4_t __s0_5 = __p0_5; \ - float16x4_t __s1_5 = __p1_5; \ - float16x4_t __s2_5 = __p2_5; \ - float16x4_t __rev0_5; __rev0_5 = __builtin_shufflevector(__s0_5, __s0_5, 3, 2, 1, 0); \ - float16x4_t __rev1_5; __rev1_5 = __builtin_shufflevector(__s1_5, __s1_5, 3, 2, 1, 0); \ - float16x4_t __rev2_5; __rev2_5 = __builtin_shufflevector(__s2_5, __s2_5, 3, 2, 1, 0); \ - float16x4_t __ret_5; \ - __ret_5 = __noswap_vfma_lane_f16(__rev0_5, -__rev1_5, __rev2_5, __p3_5); \ - __ret_5 = __builtin_shufflevector(__ret_5, __ret_5, 3, 2, 1, 0); \ - __ret_5; \ -}) +__ai float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsh_laneq_f16(__p0_6, __p1_6, __p2_6, __p3_6) __extension__ ({ \ - float16_t __s0_6 = __p0_6; \ - float16_t __s1_6 = __p1_6; \ - float16x8_t __s2_6 = __p2_6; \ - float16_t __ret_6; \ - __ret_6 = vfmah_laneq_f16(__s0_6, -__s1_6, __s2_6, __p3_6); \ - __ret_6; \ -}) +__ai float16x8_t vcvtq_f16_s16(int16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__p0, 33); + return __ret; +} #else -#define vfmsh_laneq_f16(__p0_7, __p1_7, __p2_7, __p3_7) __extension__ ({ \ - float16_t __s0_7 = __p0_7; \ - float16_t __s1_7 = __p1_7; \ - float16x8_t __s2_7 = __p2_7; \ - float16x8_t __rev2_7; __rev2_7 = __builtin_shufflevector(__s2_7, __s2_7, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_7; \ - __ret_7 = __noswap_vfmah_laneq_f16(__s0_7, -__s1_7, __rev2_7, __p3_7); \ - __ret_7; \ -}) +__ai float16x8_t vcvtq_f16_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvtq_f16_v((int8x16_t)__rev0, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f16(__p0_8, __p1_8, __p2_8, __p3_8) __extension__ ({ \ - float16x8_t __s0_8 = __p0_8; \ - float16x8_t __s1_8 = __p1_8; \ - float16x8_t __s2_8 = __p2_8; \ - float16x8_t __ret_8; \ - __ret_8 = vfmaq_laneq_f16(__s0_8, -__s1_8, __s2_8, __p3_8); \ - __ret_8; \ -}) +__ai float16x4_t vcvt_f16_u16(uint16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__p0, 17); + return __ret; +} #else -#define vfmsq_laneq_f16(__p0_9, __p1_9, __p2_9, __p3_9) __extension__ ({ \ - float16x8_t __s0_9 = __p0_9; \ - float16x8_t __s1_9 = __p1_9; \ - float16x8_t __s2_9 = __p2_9; \ - float16x8_t __rev0_9; __rev0_9 = __builtin_shufflevector(__s0_9, __s0_9, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_9; __rev1_9 = __builtin_shufflevector(__s1_9, __s1_9, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_9; __rev2_9 = __builtin_shufflevector(__s2_9, __s2_9, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_9; \ - __ret_9 = __noswap_vfmaq_laneq_f16(__rev0_9, -__rev1_9, __rev2_9, __p3_9); \ - __ret_9 = __builtin_shufflevector(__ret_9, __ret_9, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_9; \ -}) +__ai float16x4_t vcvt_f16_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f16(__p0_10, __p1_10, __p2_10, __p3_10) __extension__ ({ \ - float16x4_t __s0_10 = __p0_10; \ - float16x4_t __s1_10 = __p1_10; \ - float16x8_t __s2_10 = __p2_10; \ - float16x4_t __ret_10; \ - __ret_10 = vfma_laneq_f16(__s0_10, -__s1_10, __s2_10, __p3_10); \ - __ret_10; \ -}) +__ai float16x4_t vcvt_f16_s16(int16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__p0, 1); + return __ret; +} #else -#define vfms_laneq_f16(__p0_11, __p1_11, __p2_11, __p3_11) __extension__ ({ \ - float16x4_t __s0_11 = __p0_11; \ - float16x4_t __s1_11 = __p1_11; \ - float16x8_t __s2_11 = __p2_11; \ - float16x4_t __rev0_11; __rev0_11 = __builtin_shufflevector(__s0_11, __s0_11, 3, 2, 1, 0); \ - float16x4_t __rev1_11; __rev1_11 = __builtin_shufflevector(__s1_11, __s1_11, 3, 2, 1, 0); \ - float16x8_t __rev2_11; __rev2_11 = __builtin_shufflevector(__s2_11, __s2_11, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret_11; \ - __ret_11 = __noswap_vfma_laneq_f16(__rev0_11, -__rev1_11, __rev2_11, __p3_11); \ - __ret_11 = __builtin_shufflevector(__ret_11, __ret_11, 3, 2, 1, 0); \ - __ret_11; \ -}) +__ai float16x4_t vcvt_f16_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16_t __s2 = __p2; \ +#define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ float16x8_t __ret; \ - __ret = vfmaq_f16(__s0, -__s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ + __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else -#define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __ret; \ - __ret = __noswap_vfmaq_f16(__rev0, -__rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ + __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x4_t __ret; \ - __ret = vfma_f16(__s0, -__s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ +#define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else -#define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __noswap_vfma_f16(__rev0, -__rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_v((int8x16_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmaxnmvq_f16(__p0) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__s0); \ +#define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vmaxnmvq_f16(__p0) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__rev0); \ +#define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmaxnmv_f16(__p0) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__s0); \ +#define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else -#define vmaxnmv_f16(__p0) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__rev0); \ +#define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_v((int8x8_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmaxvq_f16(__p0) __extension__ ({ \ +#define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__s0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else -#define vmaxvq_f16(__p0) __extension__ ({ \ +#define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__rev0); \ + int16x8_t __ret; \ + __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_v((int8x16_t)__rev0, __p1, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmaxv_f16(__p0) __extension__ ({ \ +#define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__s0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else -#define vmaxv_f16(__p0) __extension__ ({ \ +#define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__rev0); \ + int16x4_t __ret; \ + __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_v((int8x8_t)__rev0, __p1, 1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vminnmvq_f16(__p0) __extension__ ({ \ +#define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__s0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else -#define vminnmvq_f16(__p0) __extension__ ({ \ +#define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__rev0); \ + uint16x8_t __ret; \ + __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_v((int8x16_t)__rev0, __p1, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vminnmv_f16(__p0) __extension__ ({ \ +#define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__s0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else -#define vminnmv_f16(__p0) __extension__ ({ \ +#define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__rev0); \ + uint16x4_t __ret; \ + __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_v((int8x8_t)__rev0, __p1, 17); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vminvq_f16(__p0) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__s0); \ - __ret; \ -}) +__ai int16x8_t vcvtq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtq_s16_v((int8x16_t)__p0, 33); + return __ret; +} #else -#define vminvq_f16(__p0) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__rev0); \ - __ret; \ -}) +__ai int16x8_t vcvtq_s16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtq_s16_v((int8x16_t)__rev0, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vminv_f16(__p0) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__s0); \ - __ret; \ -}) +__ai int16x4_t vcvt_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvt_s16_v((int8x8_t)__p0, 1); + return __ret; +} #else -#define vminv_f16(__p0) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__rev0); \ - __ret; \ -}) +__ai int16x4_t vcvt_s16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvt_s16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ - __ret; \ -}) +__ai uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_v((int8x16_t)__p0, 49); + return __ret; +} #else -#define vmulq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ - __ret; \ -}) +__ai uint16x4_t vcvt_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvt_u16_v((int8x8_t)__p0, 17); + return __ret; +} #else -#define vmul_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint16x4_t vcvt_u16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvt_u16_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_v((int8x16_t)__p0, 33); return __ret; } #else -__ai float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -__ai float16x8_t __noswap_vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vcvta_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvta_s16_v((int8x8_t)__p0, 1); + return __ret; +} +#else +__ai int16x4_t vcvta_s16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvta_s16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +__ai uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_v((int8x16_t)__p0, 49); return __ret; } #else -__ai float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { +__ai uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vcvta_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvta_u16_v((int8x8_t)__p0, 17); + return __ret; +} +#else +__ai uint16x4_t vcvta_u16_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvta_u16_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 8); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_v((int8x16_t)__p0, 33); + return __ret; +} +#else +__ai int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_v((int8x16_t)__rev0, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) +__ai int16x4_t vcvtm_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtm_s16_v((int8x8_t)__p0, 1); + return __ret; +} #else -#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__rev1, __p2); \ - __ret; \ -}) +__ai int16x4_t vcvtm_s16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtm_s16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = vmulxq_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_v((int8x16_t)__p0, 49); + return __ret; +} #else -#define vmulxq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __noswap_vmulxq_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = vmulx_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_v((int8x8_t)__p0, 17); + return __ret; +} +#else +__ai uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_v((int8x16_t)__p0, 33); + return __ret; +} +#else +__ai int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_v((int8x16_t)__rev0, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x4_t vcvtn_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtn_s16_v((int8x8_t)__p0, 1); + return __ret; +} +#else +__ai int16x4_t vcvtn_s16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtn_s16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_v((int8x16_t)__p0, 49); + return __ret; +} +#else +__ai uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_v((int8x8_t)__p0, 17); + return __ret; +} +#else +__ai uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_v((int8x16_t)__p0, 33); + return __ret; +} #else -#define vmulx_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __noswap_vmulx_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_v((int8x16_t)__rev0, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) +__ai int16x4_t vcvtp_s16_f16(float16x4_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtp_s16_v((int8x8_t)__p0, 1); + return __ret; +} #else -#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__rev1, __p2); \ - __ret; \ -}) +__ai int16x4_t vcvtp_s16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __ret; + __ret = (int16x4_t) __builtin_neon_vcvtp_s16_v((int8x8_t)__rev0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __ret; \ - __ret = vmulxq_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_v((int8x16_t)__p0, 49); + return __ret; +} #else -#define vmulxq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __noswap_vmulxq_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __ret; \ - __ret = vmulx_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_v((int8x8_t)__p0, 17); + return __ret; +} #else -#define vmulx_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __noswap_vmulx_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ +#define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s0 = __p0; \ - float16_t __s1 = __p1; \ + float16x8_t __s1 = __p1; \ float16x8_t __ret; \ - __ret = vmulxq_f16(__s0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ + __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ __ret; \ }) #else -#define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ +#define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s0 = __p0; \ - float16_t __s1 = __p1; \ + float16x8_t __s1 = __p1; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __ret; \ - __ret = __noswap_vmulxq_f16(__rev0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ + __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_n_f16(__p0, __p1) __extension__ ({ \ +#define vext_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s0 = __p0; \ - float16_t __s1 = __p1; \ + float16x4_t __s1 = __p1; \ float16x4_t __ret; \ - __ret = vmulx_f16(__s0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ + __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 8); \ __ret; \ }) #else -#define vmulx_n_f16(__p0, __p1) __extension__ ({ \ +#define vext_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s0 = __p0; \ - float16_t __s1 = __p1; \ + float16x4_t __s1 = __p1; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float16x4_t __ret; \ - __ret = __noswap_vmulx_f16(__rev0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ + __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else -__ai float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } +__ai float16x8_t __noswap_vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); +__ai float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else -__ai float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = vfmaq_f16(__p0, -__p1, __p2); return __ret; } #else -__ai float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __noswap_vfmaq_f16(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = vfma_f16(__p0, -__p1, __p2); return __ret; } #else -__ai float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __noswap_vfma_f16(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = (float16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = (float16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = (float16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = (float16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = (float16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = (float16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vrndiq_f16(float16x8_t __p0) { +__ai float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 40); + __ret = __p0 * __p1; return __ret; } #else -__ai float16x8_t vrndiq_f16(float16x8_t __p0) { +__ai float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 40); + __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vrndi_f16(float16x4_t __p0) { +__ai float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 8); + __ret = __p0 * __p1; return __ret; } #else -__ai float16x4_t vrndi_f16(float16x4_t __p0) { +__ai float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 8); + __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vsqrtq_f16(float16x8_t __p0) { +#define vmulq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret; \ +}) +#else +#define vmulq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ +}) +#else +#define vmul_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_n_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = __s0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ + __ret; \ +}) +#else +#define vmulq_n_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __rev0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_n_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = __s0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ + __ret; \ +}) +#else +#define vmul_n_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __rev0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 40); + __ret = -__p0; return __ret; } #else -__ai float16x8_t vsqrtq_f16(float16x8_t __p0) { +__ai float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 40); + __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vsqrt_f16(float16x4_t __p0) { +__ai float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 8); + __ret = -__p0; return __ret; } #else -__ai float16x4_t vsqrt_f16(float16x4_t __p0) { +__ai float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 8); + __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); +__ai float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); + __ret = (float16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); + __ret = (float16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); + __ret = (float16x8_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); + __ret = (float16x8_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); + __ret = (float16x4_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 8); return __ret; } #else -__ai float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); + __ret = (float16x4_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); + __ret = (float16x8_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); + __ret = (float16x8_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); + __ret = (float16x4_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); + __ret = (float16x4_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else -__ai float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else -__ai float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); + __ret = (float16x8_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); + __ret = (float16x8_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); + __ret = (float16x4_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 8); return __ret; } #else -__ai float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); + __ret = (float16x4_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); + __ret = (float16x8_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { +__ai float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); + __ret = (float16x8_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); + __ret = (float16x4_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { +__ai float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#endif -#if defined(__ARM_FEATURE_QRDMX) -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int32x4_t __ret; - __ret = vqaddq_s32(__p0, vqrdmulhq_s32(__p1, __p2)); - return __ret; -} -#else -__ai int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __rev2)); + __ret = (float16x4_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int16x8_t __ret; - __ret = vqaddq_s16(__p0, vqrdmulhq_s16(__p1, __p2)); +__ai float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __p0 - __p1; return __ret; } #else -__ai int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __rev2)); +__ai float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { - int32x2_t __ret; - __ret = vqadd_s32(__p0, vqrdmulh_s32(__p1, __p2)); +__ai float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __p0 - __p1; return __ret; } #else -__ai int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - int32x2_t __ret; - __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __rev0 - __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { - int16x4_t __ret; - __ret = vqadd_s16(__p0, vqrdmulh_s16(__p1, __p2)); +__ai float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8x2_t __ret; + __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int16x4_t __ret; - __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8x2_t __ret; + __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqaddq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ - __ret; \ -}) +__ai float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4x2_t __ret; + __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} #else -#define vqrdmlahq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif +__ai float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4x2_t __ret; + __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x8_t __ret; \ - __ret = vqaddq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ - __ret; \ -}) -#else -#define vqrdmlahq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __ret; \ - __ret = vqadd_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ - __ret; \ -}) +__ai float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8x2_t __ret; + __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} #else -#define vqrdmlah_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int32x2_t __ret; \ - __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif +__ai float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8x2_t __ret; + __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = vqadd_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ - __ret; \ -}) -#else -#define vqrdmlah_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int32x4_t __ret; - __ret = vqsubq_s32(__p0, vqrdmulhq_s32(__p1, __p2)); +__ai float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4x2_t __ret; + __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4x2_t __ret; + __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int16x8_t __ret; - __ret = vqsubq_s16(__p0, vqrdmulhq_s16(__p1, __p2)); +__ai float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8x2_t __ret; + __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8x2_t __ret; + __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { - int32x2_t __ret; - __ret = vqsub_s32(__p0, vqrdmulh_s32(__p1, __p2)); +__ai float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4x2_t __ret; + __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - int32x2_t __ret; - __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4x2_t __ret; + __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif +#endif +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { - int16x4_t __ret; - __ret = vqsub_s16(__p0, vqrdmulh_s16(__p1, __p2)); +__ai float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __p0 / __p1; return __ret; } #else -__ai int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int16x4_t __ret; - __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __rev0 / __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqsubq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ - __ret; \ -}) +__ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __p0 / __p1; + return __ret; +} #else -#define vqrdmlshq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __rev0 / __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x8_t __ret; \ - __ret = vqsubq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ +#define vduph_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__s0, __p1); \ __ret; \ }) #else -#define vqrdmlshq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vduph_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __ret; \ - __ret = vqsub_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ +#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vqrdmlsh_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int32x2_t __ret; \ - __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = vqsub_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ +#define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #else -#define vqrdmlsh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#endif -#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqaddq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ +#define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__rev2, __p3); \ __ret; \ }) -#else -#define vqrdmlahq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define __noswap_vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __ret; \ - __ret = vqaddq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ +#define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ __ret; \ }) #else -#define vqrdmlahq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ +#define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __ret; \ - __ret = vqadd_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ - __ret; \ -}) -#else -#define vqrdmlah_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x2_t __ret; \ - __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define __noswap_vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = vqadd_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ +#define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ __ret; \ }) #else -#define vqrdmlah_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ +#define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqsubq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ +#define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #else -#define vqrdmlshq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) +#define __noswap_vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __ret; \ - __ret = vqsubq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ +#define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ __ret; \ }) #else -#define vqrdmlshq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ +#define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) +#define __noswap_vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __ret; \ - __ret = vqsub_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ +#define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ __ret; \ }) #else -#define vqrdmlsh_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x2_t __ret; \ - __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 8); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = (float16x4_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = vqsub_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ +#define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = vfmaq_f16(__s0, __s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ __ret; \ }) #else -#define vqrdmlsh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __noswap_vfmaq_f16(__rev0, __rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif -#endif -#if defined(__aarch64__) -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); - return __ret; -} -#else -__ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#else -__ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); - return __ret; -} -#else -__ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); - return __ret; -} -#else -__ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vabsq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 42); - return __ret; -} -#else -__ai float64x2_t vabsq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vabsq_s64(int64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 35); - return __ret; -} -#else -__ai int64x2_t vabsq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vabs_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); - return __ret; -} -#else -__ai float64x1_t vabs_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); - return __ret; -} +#define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = vfma_f16(__s0, __s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ + __ret; \ +}) #else -__ai int64x1_t vabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); - return __ret; -} +#define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __noswap_vfma_f16(__rev0, __rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); - return __ret; -} +#define vfmsh_lane_f16(__p0_0, __p1_0, __p2_0, __p3_0) __extension__ ({ \ + float16_t __s0_0 = __p0_0; \ + float16_t __s1_0 = __p1_0; \ + float16x4_t __s2_0 = __p2_0; \ + float16_t __ret_0; \ + __ret_0 = vfmah_lane_f16(__s0_0, -__s1_0, __s2_0, __p3_0); \ + __ret_0; \ +}) #else -__ai int64_t vabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); - return __ret; -} +#define vfmsh_lane_f16(__p0_1, __p1_1, __p2_1, __p3_1) __extension__ ({ \ + float16_t __s0_1 = __p0_1; \ + float16_t __s1_1 = __p1_1; \ + float16x4_t __s2_1 = __p2_1; \ + float16x4_t __rev2_1; __rev2_1 = __builtin_shufflevector(__s2_1, __s2_1, 3, 2, 1, 0); \ + float16_t __ret_1; \ + __ret_1 = __noswap_vfmah_lane_f16(__s0_1, -__s1_1, __rev2_1, __p3_1); \ + __ret_1; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = __p0 + __p1; - return __ret; -} +#define vfmsq_lane_f16(__p0_2, __p1_2, __p2_2, __p3_2) __extension__ ({ \ + float16x8_t __s0_2 = __p0_2; \ + float16x8_t __s1_2 = __p1_2; \ + float16x4_t __s2_2 = __p2_2; \ + float16x8_t __ret_2; \ + __ret_2 = vfmaq_lane_f16(__s0_2, -__s1_2, __s2_2, __p3_2); \ + __ret_2; \ +}) #else -__ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vfmsq_lane_f16(__p0_3, __p1_3, __p2_3, __p3_3) __extension__ ({ \ + float16x8_t __s0_3 = __p0_3; \ + float16x8_t __s1_3 = __p1_3; \ + float16x4_t __s2_3 = __p2_3; \ + float16x8_t __rev0_3; __rev0_3 = __builtin_shufflevector(__s0_3, __s0_3, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_3; __rev1_3 = __builtin_shufflevector(__s1_3, __s1_3, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_3; __rev2_3 = __builtin_shufflevector(__s2_3, __s2_3, 3, 2, 1, 0); \ + float16x8_t __ret_3; \ + __ret_3 = __noswap_vfmaq_lane_f16(__rev0_3, -__rev1_3, __rev2_3, __p3_3); \ + __ret_3 = __builtin_shufflevector(__ret_3, __ret_3, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_3; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 + __p1; - return __ret; -} +#define vfms_lane_f16(__p0_4, __p1_4, __p2_4, __p3_4) __extension__ ({ \ + float16x4_t __s0_4 = __p0_4; \ + float16x4_t __s1_4 = __p1_4; \ + float16x4_t __s2_4 = __p2_4; \ + float16x4_t __ret_4; \ + __ret_4 = vfma_lane_f16(__s0_4, -__s1_4, __s2_4, __p3_4); \ + __ret_4; \ +}) #else -__ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 + __p1; - return __ret; -} +#define vfms_lane_f16(__p0_5, __p1_5, __p2_5, __p3_5) __extension__ ({ \ + float16x4_t __s0_5 = __p0_5; \ + float16x4_t __s1_5 = __p1_5; \ + float16x4_t __s2_5 = __p2_5; \ + float16x4_t __rev0_5; __rev0_5 = __builtin_shufflevector(__s0_5, __s0_5, 3, 2, 1, 0); \ + float16x4_t __rev1_5; __rev1_5 = __builtin_shufflevector(__s1_5, __s1_5, 3, 2, 1, 0); \ + float16x4_t __rev2_5; __rev2_5 = __builtin_shufflevector(__s2_5, __s2_5, 3, 2, 1, 0); \ + float16x4_t __ret_5; \ + __ret_5 = __noswap_vfma_lane_f16(__rev0_5, -__rev1_5, __rev2_5, __p3_5); \ + __ret_5 = __builtin_shufflevector(__ret_5, __ret_5, 3, 2, 1, 0); \ + __ret_5; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); - return __ret; -} +#define vfmsh_laneq_f16(__p0_6, __p1_6, __p2_6, __p3_6) __extension__ ({ \ + float16_t __s0_6 = __p0_6; \ + float16_t __s1_6 = __p1_6; \ + float16x8_t __s2_6 = __p2_6; \ + float16_t __ret_6; \ + __ret_6 = vfmah_laneq_f16(__s0_6, -__s1_6, __s2_6, __p3_6); \ + __ret_6; \ +}) #else -__ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); - return __ret; -} +#define vfmsh_laneq_f16(__p0_7, __p1_7, __p2_7, __p3_7) __extension__ ({ \ + float16_t __s0_7 = __p0_7; \ + float16_t __s1_7 = __p1_7; \ + float16x8_t __s2_7 = __p2_7; \ + float16x8_t __rev2_7; __rev2_7 = __builtin_shufflevector(__s2_7, __s2_7, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_7; \ + __ret_7 = __noswap_vfmah_laneq_f16(__s0_7, -__s1_7, __rev2_7, __p3_7); \ + __ret_7; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); - return __ret; -} +#define vfmsq_laneq_f16(__p0_8, __p1_8, __p2_8, __p3_8) __extension__ ({ \ + float16x8_t __s0_8 = __p0_8; \ + float16x8_t __s1_8 = __p1_8; \ + float16x8_t __s2_8 = __p2_8; \ + float16x8_t __ret_8; \ + __ret_8 = vfmaq_laneq_f16(__s0_8, -__s1_8, __s2_8, __p3_8); \ + __ret_8; \ +}) #else -__ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); - return __ret; -} +#define vfmsq_laneq_f16(__p0_9, __p1_9, __p2_9, __p3_9) __extension__ ({ \ + float16x8_t __s0_9 = __p0_9; \ + float16x8_t __s1_9 = __p1_9; \ + float16x8_t __s2_9 = __p2_9; \ + float16x8_t __rev0_9; __rev0_9 = __builtin_shufflevector(__s0_9, __s0_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_9; __rev1_9 = __builtin_shufflevector(__s1_9, __s1_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_9; __rev2_9 = __builtin_shufflevector(__s2_9, __s2_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_9; \ + __ret_9 = __noswap_vfmaq_laneq_f16(__rev0_9, -__rev1_9, __rev2_9, __p3_9); \ + __ret_9 = __builtin_shufflevector(__ret_9, __ret_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_9; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x8_t __ret; - __ret = vcombine_u16(__p0, vaddhn_u32(__p1, __p2)); - return __ret; -} +#define vfms_laneq_f16(__p0_10, __p1_10, __p2_10, __p3_10) __extension__ ({ \ + float16x4_t __s0_10 = __p0_10; \ + float16x4_t __s1_10 = __p1_10; \ + float16x8_t __s2_10 = __p2_10; \ + float16x4_t __ret_10; \ + __ret_10 = vfma_laneq_f16(__s0_10, -__s1_10, __s2_10, __p3_10); \ + __ret_10; \ +}) #else -__ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16(__rev0, __noswap_vaddhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vfms_laneq_f16(__p0_11, __p1_11, __p2_11, __p3_11) __extension__ ({ \ + float16x4_t __s0_11 = __p0_11; \ + float16x4_t __s1_11 = __p1_11; \ + float16x8_t __s2_11 = __p2_11; \ + float16x4_t __rev0_11; __rev0_11 = __builtin_shufflevector(__s0_11, __s0_11, 3, 2, 1, 0); \ + float16x4_t __rev1_11; __rev1_11 = __builtin_shufflevector(__s1_11, __s1_11, 3, 2, 1, 0); \ + float16x8_t __rev2_11; __rev2_11 = __builtin_shufflevector(__s2_11, __s2_11, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_11; \ + __ret_11 = __noswap_vfma_laneq_f16(__rev0_11, -__rev1_11, __rev2_11, __p3_11); \ + __ret_11 = __builtin_shufflevector(__ret_11, __ret_11, 3, 2, 1, 0); \ + __ret_11; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x4_t __ret; - __ret = vcombine_u32(__p0, vaddhn_u64(__p1, __p2)); - return __ret; -} +#define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x8_t __ret; \ + __ret = vfmaq_f16(__s0, -__s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ + __ret; \ +}) #else -__ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32(__rev0, __noswap_vaddhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __noswap_vfmaq_f16(__rev0, -__rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { - uint8x16_t __ret; - __ret = vcombine_u8(__p0, vaddhn_u16(__p1, __p2)); - return __ret; -} +#define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x4_t __ret; \ + __ret = vfma_f16(__s0, -__s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ + __ret; \ +}) #else -__ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __noswap_vcombine_u8(__rev0, __noswap_vaddhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16_t __s2 = __p2; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __noswap_vfma_f16(__rev0, -__rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x8_t __ret; - __ret = vcombine_s16(__p0, vaddhn_s32(__p1, __p2)); - return __ret; -} -#else -__ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vcombine_s16(__rev0, __noswap_vaddhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmaxnmvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__s0); \ + __ret; \ +}) +#else +#define vmaxnmvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x4_t __ret; - __ret = vcombine_s32(__p0, vaddhn_s64(__p1, __p2)); - return __ret; -} +#define vmaxnmv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__s0); \ + __ret; \ +}) #else -__ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vcombine_s32(__rev0, __noswap_vaddhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vmaxnmv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x16_t __ret; - __ret = vcombine_s8(__p0, vaddhn_s16(__p1, __p2)); - return __ret; -} +#define vmaxvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__s0); \ + __ret; \ +}) #else -__ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __noswap_vcombine_s8(__rev0, __noswap_vaddhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmaxvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vaddlvq_u8(uint8x16_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__p0); - return __ret; -} +#define vmaxv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__s0); \ + __ret; \ +}) #else -__ai uint16_t vaddlvq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__rev0); - return __ret; -} +#define vmaxv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vaddlvq_u32(uint32x4_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__p0); - return __ret; -} +#define vminnmvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__s0); \ + __ret; \ +}) #else -__ai uint64_t vaddlvq_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__rev0); - return __ret; -} +#define vminnmvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vaddlvq_u16(uint16x8_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__p0); - return __ret; -} +#define vminnmv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__s0); \ + __ret; \ +}) #else -__ai uint32_t vaddlvq_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__rev0); - return __ret; -} +#define vminnmv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vaddlvq_s8(int8x16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__p0); - return __ret; -} +#define vminvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__s0); \ + __ret; \ +}) #else -__ai int16_t vaddlvq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__rev0); - return __ret; -} +#define vminvq_f16(__p0) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vaddlvq_s32(int32x4_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__p0); - return __ret; -} +#define vminv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__s0); \ + __ret; \ +}) #else -__ai int64_t vaddlvq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__rev0); - return __ret; -} +#define vminv_f16(__p0) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__rev0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vaddlvq_s16(int16x8_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__p0); - return __ret; -} +#define vmulq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret; \ +}) #else -__ai int32_t vaddlvq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__rev0); - return __ret; -} +#define vmulq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vaddlv_u8(uint8x8_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__p0); - return __ret; -} +#define vmul_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ +}) #else -__ai uint16_t vaddlv_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__rev0); - return __ret; -} +#define vmul_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vaddlv_u32(uint32x2_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__p0); +__ai float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai uint64_t vaddlv_u32(uint32x2_t __p0) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__rev0); +__ai float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai float16x8_t __noswap_vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vaddlv_u16(uint16x4_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__p0); +__ai float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai uint32_t vaddlv_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__rev0); +__ai float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vaddlv_s8(int8x8_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__p0); - return __ret; -} +#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) #else -__ai int16_t vaddlv_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__rev0); - return __ret; -} +#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__rev1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vaddlv_s32(int32x2_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__p0); - return __ret; -} +#define vmulxq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = vmulxq_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai int64_t vaddlv_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__rev0); - return __ret; -} +#define vmulxq_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __noswap_vmulxq_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vaddlv_s16(int16x4_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__p0); - return __ret; -} +#define vmulx_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = vmulx_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai int32_t vaddlv_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__rev0); - return __ret; -} +#define vmulx_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __noswap_vmulx_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vaddvq_u8(uint8x16_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__p0); - return __ret; -} +#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ +}) #else -__ai uint8_t vaddvq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__rev0); - return __ret; -} +#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__rev1, __p2); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vaddvq_u32(uint32x4_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__p0); - return __ret; -} +#define vmulxq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = vmulxq_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint32_t vaddvq_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__rev0); - return __ret; -} +#define vmulxq_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __noswap_vmulxq_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vaddvq_u64(uint64x2_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__p0); - return __ret; -} +#define vmulx_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = vmulx_f16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint64_t vaddvq_u64(uint64x2_t __p0) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__rev0); - return __ret; -} +#define vmulx_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __noswap_vmulx_f16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vaddvq_u16(uint16x8_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__p0); - return __ret; -} +#define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __ret; \ + __ret = vmulxq_f16(__s0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ + __ret; \ +}) #else -__ai uint16_t vaddvq_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__rev0); - return __ret; -} +#define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __noswap_vmulxq_f16(__rev0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vaddvq_s8(int8x16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__p0); - return __ret; -} +#define vmulx_n_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __ret; \ + __ret = vmulx_f16(__s0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ + __ret; \ +}) #else -__ai int8_t vaddvq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__rev0); - return __ret; -} +#define vmulx_n_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __noswap_vmulx_f16(__rev0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vaddvq_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__p0); +__ai float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float64_t vaddvq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__rev0); +__ai float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vaddvq_f32(float32x4_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__p0); +__ai float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai float32_t vaddvq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__rev0); +__ai float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vaddvq_s32(int32x4_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__p0); +__ai float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai int32_t vaddvq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__rev0); +__ai float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vaddvq_s64(int64x2_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__p0); +__ai float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai int64_t vaddvq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__rev0); +__ai float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vaddvq_s16(int16x8_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__p0); +__ai float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai int16_t vaddvq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__rev0); +__ai float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vaddv_u8(uint8x8_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__p0); +__ai float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else -__ai uint8_t vaddv_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__rev0); +__ai float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vaddv_u32(uint32x2_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__p0); +__ai float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else -__ai uint32_t vaddv_u32(uint32x2_t __p0) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__rev0); +__ai float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vaddv_u16(uint16x4_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__p0); +__ai float16x8_t vrndiq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai uint16_t vaddv_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__rev0); +__ai float16x8_t vrndiq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vaddv_s8(int8x8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__p0); +__ai float16x4_t vrndi_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 8); return __ret; } #else -__ai int8_t vaddv_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__rev0); +__ai float16x4_t vrndi_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vaddv_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__p0); +__ai float16x8_t vsqrtq_f16(float16x8_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 40); return __ret; } #else -__ai float32_t vaddv_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__rev0); +__ai float16x8_t vsqrtq_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vaddv_s32(int32x2_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__p0); +__ai float16x4_t vsqrt_f16(float16x4_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 8); return __ret; } #else -__ai int32_t vaddv_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__rev0); +__ai float16x4_t vsqrt_f16(float16x4_t __p0) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vaddv_s16(int16x4_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__p0); +__ai float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else -__ai int16_t vaddv_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__rev0); +__ai float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { - poly64x1_t __ret; - __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); +__ai float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else -__ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { - poly64x1_t __ret; - __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); +__ai float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { - poly64x2_t __ret; - __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 38); +__ai float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else -__ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - poly64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - poly64x2_t __ret; - __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 38); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); +__ai float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else -__ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); +__ai float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else -__ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); +__ai float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else -__ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else -__ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); +__ai float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else -__ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); +__ai float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); +__ai float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else -__ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); +__ai float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else -__ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else -__ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); +__ai float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else -__ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); +__ai float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif +#endif +#if defined(__ARM_FEATURE_QRDMX) #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); +__ai int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int32x4_t __ret; + __ret = vqaddq_s32(__p0, vqrdmulhq_s32(__p1, __p2)); return __ret; } #else -__ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); +__ai int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int16x8_t __ret; + __ret = vqaddq_s16(__p0, vqrdmulhq_s16(__p1, __p2)); return __ret; } #else -__ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { + int32x2_t __ret; + __ret = vqadd_s32(__p0, vqrdmulh_s32(__p1, __p2)); return __ret; } #else -__ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __ret; + __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); +__ai int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { + int16x4_t __ret; + __ret = vqadd_s16(__p0, vqrdmulh_s16(__p1, __p2)); return __ret; } #else -__ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); +__ai int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __ret; + __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); +#define vqrdmlahq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqaddq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) +#else +#define vqrdmlahq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlahq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x8_t __ret; \ + __ret = vqaddq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) +#else +#define vqrdmlahq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlah_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __ret; \ + __ret = vqadd_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ + __ret; \ +}) +#else +#define vqrdmlah_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int32x2_t __ret; \ + __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlah_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __ret; \ + __ret = vqadd_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) +#else +#define vqrdmlah_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int32x4_t __ret; + __ret = vqsubq_s32(__p0, vqrdmulhq_s32(__p1, __p2)); return __ret; } #else -__ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); +__ai int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int16x8_t __ret; + __ret = vqsubq_s16(__p0, vqrdmulhq_s16(__p1, __p2)); return __ret; } #else -__ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { + int32x2_t __ret; + __ret = vqsub_s32(__p0, vqrdmulh_s32(__p1, __p2)); return __ret; } #else -__ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __ret; + __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); +__ai int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { + int16x4_t __ret; + __ret = vqsub_s16(__p0, vqrdmulh_s16(__p1, __p2)); return __ret; } #else -__ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); +__ai int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __ret; + __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); - return __ret; -} +#define vqrdmlshq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqsubq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); - return __ret; -} +#define vqrdmlshq_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlshq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x8_t __ret; \ + __ret = vqsubq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlshq_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlsh_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __ret; \ + __ret = vqsub_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vqrdmlsh_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int32x2_t __ret; \ + __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlsh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __ret; \ + __ret = vqsub_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vqrdmlsh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif +#endif +#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlahq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqaddq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vqrdmlahq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqaddq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlahq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __ret; \ + __ret = vqaddq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vqrdmlahq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __noswap_vqaddq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlah_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x2_t __ret; \ + __ret = vqadd_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlah_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x2_t __ret; \ + __ret = __noswap_vqadd_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlah_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x4_t __ret; \ + __ret = vqadd_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlah_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __noswap_vqadd_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlshq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqsubq_s32(__s0, vqrdmulhq_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} +#define vqrdmlshq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqsubq_s32(__rev0, __noswap_vqrdmulhq_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); - return __ret; -} +#define vqrdmlshq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __ret; \ + __ret = vqsubq_s16(__s0, vqrdmulhq_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); - return __ret; -} +#define vqrdmlshq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __noswap_vqsubq_s16(__rev0, __noswap_vqrdmulhq_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); - return __ret; -} +#define vqrdmlsh_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x2_t __ret; \ + __ret = vqsub_s32(__s0, vqrdmulh_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3))); \ + __ret; \ +}) #else -__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); - return __ret; -} +#define vqrdmlsh_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x2_t __ret; \ + __ret = __noswap_vqsub_s32(__rev0, __noswap_vqrdmulh_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); - return __ret; -} +#define vqrdmlsh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x4_t __ret; \ + __ret = vqsub_s16(__s0, vqrdmulh_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3))); \ + __ret; \ +}) #else -__ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); - return __ret; -} +#define vqrdmlsh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __noswap_vqsub_s16(__rev0, __noswap_vqrdmulh_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3))); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif +#endif +#if defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); +__ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); +__ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vceqz_p8(poly8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); +__ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #else -__ai uint8x8_t vceqz_p8(poly8x8_t __p0) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceqz_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); return __ret; } #else -__ai uint64x1_t vceqz_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); +__ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); return __ret; } #else -__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); +__ai float64x2_t vabsq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vabsq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); +__ai int64x2_t vabsq_s64(int64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 35); return __ret; } #else -__ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); +__ai int64x2_t vabsq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); +__ai float64x1_t vabs_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); return __ret; } #else -__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x1_t vabs_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); +__ai int64x1_t vabs_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); return __ret; } #else -__ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x1_t vabs_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); +__ai int64_t vabsd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); return __ret; } #else -__ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64_t vabsd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); +__ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = __p0 + __p1; return __ret; } #else -__ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); +__ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); +__ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 + __p1; return __ret; } #else -__ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 + __p1; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vceqzq_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); +__ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); return __ret; } #else -__ai uint8x16_t vceqzq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqzq_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); +__ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); return __ret; } #else -__ai uint64x2_t vceqzq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vceqzq_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); +__ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x8_t __ret; + __ret = vcombine_u16(__p0, vaddhn_u32(__p1, __p2)); return __ret; } #else -__ai uint32x4_t vceqzq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16(__rev0, __noswap_vaddhn_u32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vceqzq_s32(int32x4_t __p0) { +__ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); + __ret = vcombine_u32(__p0, vaddhn_u64(__p1, __p2)); return __ret; } #else -__ai uint32x4_t vceqzq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); + __ret = __noswap_vcombine_u32(__rev0, __noswap_vaddhn_u64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vceqzq_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); +__ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x16_t __ret; + __ret = vcombine_u8(__p0, vaddhn_u16(__p1, __p2)); return __ret; } #else -__ai uint64x2_t vceqzq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8(__rev0, __noswap_vaddhn_u16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqzq_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); +__ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x8_t __ret; + __ret = vcombine_s16(__p0, vaddhn_s32(__p1, __p2)); return __ret; } #else -__ai uint16x8_t vceqzq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); +__ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vcombine_s16(__rev0, __noswap_vaddhn_s32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vceqz_u8(uint8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); +__ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x4_t __ret; + __ret = vcombine_s32(__p0, vaddhn_s64(__p1, __p2)); return __ret; } #else -__ai uint8x8_t vceqz_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vcombine_s32(__rev0, __noswap_vaddhn_s64(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vceqz_u32(uint32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); +__ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x16_t __ret; + __ret = vcombine_s8(__p0, vaddhn_s16(__p1, __p2)); return __ret; } #else -__ai uint32x2_t vceqz_u32(uint32x2_t __p0) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __noswap_vcombine_s8(__rev0, __noswap_vaddhn_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceqz_u64(uint64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai uint16_t vaddlvq_u8(uint8x16_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__p0); return __ret; } #else -__ai uint64x1_t vceqz_u64(uint64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai uint16_t vaddlvq_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceqz_u16(uint16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); +__ai uint64_t vaddlvq_u32(uint32x4_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__p0); return __ret; } #else -__ai uint16x4_t vceqz_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64_t vaddlvq_u32(uint32x4_t __p0) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vceqz_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); +__ai uint32_t vaddlvq_u16(uint16x8_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__p0); return __ret; } #else -__ai uint8x8_t vceqz_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint32_t vaddlvq_u16(uint16x8_t __p0) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceqz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai int16_t vaddlvq_s8(int8x16_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__p0); return __ret; } #else -__ai uint64x1_t vceqz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai int16_t vaddlvq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vceqz_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); +__ai int64_t vaddlvq_s32(int32x4_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__p0); return __ret; } #else -__ai uint32x2_t vceqz_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int64_t vaddlvq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vceqz_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); +__ai int32_t vaddlvq_s16(int16x8_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__p0); return __ret; } #else -__ai uint32x2_t vceqz_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int32_t vaddlvq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vceqz_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai uint16_t vaddlv_u8(uint8x8_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__p0); return __ret; } #else -__ai uint64x1_t vceqz_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); +__ai uint16_t vaddlv_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceqz_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); +__ai uint64_t vaddlv_u32(uint32x2_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__p0); return __ret; } #else -__ai uint16x4_t vceqz_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64_t vaddlv_u32(uint32x2_t __p0) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vceqzd_u64(uint64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); +__ai uint32_t vaddlv_u16(uint16x4_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__p0); return __ret; } #else -__ai uint64_t vceqzd_u64(uint64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); +__ai uint32_t vaddlv_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vceqzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); +__ai int16_t vaddlv_s8(int8x8_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__p0); return __ret; } #else -__ai int64_t vceqzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); +__ai int16_t vaddlv_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vceqzd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); +__ai int64_t vaddlv_s32(int32x2_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__p0); return __ret; } #else -__ai uint64_t vceqzd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); +__ai int64_t vaddlv_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vceqzs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); +__ai int32_t vaddlv_s16(int16x4_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__p0); return __ret; } #else -__ai uint32_t vceqzs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); +__ai int32_t vaddlv_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); +__ai uint8_t vaddvq_u8(uint8x16_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__p0); return __ret; } #else -__ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8_t vaddvq_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); +__ai uint32_t vaddvq_u32(uint32x4_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__p0); return __ret; } #else -__ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint32_t vaddvq_u32(uint32x4_t __p0) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); +__ai uint64_t vaddvq_u64(uint64x2_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__p0); return __ret; } #else -__ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64_t vaddvq_u64(uint64x2_t __p0) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai uint16_t vaddvq_u16(uint16x8_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__p0); return __ret; } #else -__ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai uint16_t vaddvq_u16(uint16x8_t __p0) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai int8_t vaddvq_s8(int8x16_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__p0); return __ret; } #else -__ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai int8_t vaddvq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8_t __ret; + __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai float64_t vaddvq_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__p0); return __ret; } #else -__ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); +__ai float64_t vaddvq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); +__ai float32_t vaddvq_f32(float32x4_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__p0); return __ret; } #else -__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); +__ai float32_t vaddvq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); +__ai int32_t vaddvq_s32(int32x4_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__p0); return __ret; } #else -__ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); +__ai int32_t vaddvq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); +__ai int64_t vaddvq_s64(int64x2_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__p0); return __ret; } #else -__ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); +__ai int64_t vaddvq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64_t __ret; + __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); +__ai int16_t vaddvq_s16(int16x8_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__p0); return __ret; } #else -__ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); +__ai int16_t vaddvq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vcgezq_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 48); +__ai uint8_t vaddv_u8(uint8x8_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__p0); return __ret; } #else -__ai uint8x16_t vcgezq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8_t vaddv_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgezq_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); +__ai uint32_t vaddv_u32(uint32x2_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__p0); return __ret; } #else -__ai uint64x2_t vcgezq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint32_t vaddv_u32(uint32x2_t __p0) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcgezq_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); +__ai uint16_t vaddv_u16(uint16x4_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__p0); return __ret; } #else -__ai uint32x4_t vcgezq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint16_t vaddv_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcgezq_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); +__ai int8_t vaddv_s8(int8x8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__p0); return __ret; } #else -__ai uint32x4_t vcgezq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int8_t vaddv_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8_t __ret; + __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgezq_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); +__ai float32_t vaddv_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__p0); return __ret; } #else -__ai uint64x2_t vcgezq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float32_t vaddv_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgezq_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); +__ai int32_t vaddv_s32(int32x2_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__p0); return __ret; } #else -__ai uint16x8_t vcgezq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32_t vaddv_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vcgez_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 16); +__ai int16_t vaddv_s16(int16x4_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__p0); return __ret; } #else -__ai uint8x8_t vcgez_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16_t vaddv_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgez_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); +__ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { + poly64x1_t __ret; + __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); return __ret; } #else -__ai uint64x1_t vcgez_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); +__ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { + poly64x1_t __ret; + __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcgez_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); +__ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { + poly64x2_t __ret; + __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 38); return __ret; } #else -__ai uint32x2_t vcgez_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); +__ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + poly64x2_t __ret; + __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 38); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcgez_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); +__ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else -__ai uint32x2_t vcgez_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); +__ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgez_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); +__ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #else -__ai uint64x1_t vcgez_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); +__ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcgez_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); +__ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint16x4_t vcgez_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcgezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); +__ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai int64_t vcgezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); +__ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcgezd_f64(float64_t __p0) { +__ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); return __ret; } #else -__ai uint64_t vcgezd_f64(float64_t __p0) { +__ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcgezs_f32(float32_t __p0) { +__ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); return __ret; } #else -__ai uint32_t vcgezs_f32(float32_t __p0) { +__ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); - return __ret; -} -#else -__ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { +__ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); + __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { +__ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 > __rev1); + __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); +__ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); return __ret; } #else -__ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); return __ret; } #else -__ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); +__ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); +__ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); +__ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); return __ret; } #else -__ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); +__ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); return __ret; } #else -__ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); +__ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); +__ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); +__ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 48); +__ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); +__ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); return __ret; } #else -__ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); +__ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); return __ret; } #else -__ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); +__ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #else -__ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { +__ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); + __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else -__ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); +__ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); + __ret = (uint64x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); +__ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else -__ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vcgtz_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 16); +__ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else -__ai uint8x8_t vcgtz_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgtz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); +__ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else -__ai uint64x1_t vcgtz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); +__ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcgtz_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); +__ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #else -__ai uint32x2_t vcgtz_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcgtz_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); +__ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #else -__ai uint32x2_t vcgtz_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcgtz_s64(int64x1_t __p0) { +__ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #else -__ai uint64x1_t vcgtz_s64(int64x1_t __p0) { +__ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcgtz_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); +__ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); return __ret; } #else -__ai uint16x4_t vcgtz_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcgtzd_s64(int64_t __p0) { +__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); + __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); return __ret; } #else -__ai int64_t vcgtzd_s64(int64_t __p0) { +__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); + __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcgtzd_f64(float64_t __p0) { +__ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); return __ret; } #else -__ai uint64_t vcgtzd_f64(float64_t __p0) { +__ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcgtzs_f32(float32_t __p0) { +__ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); return __ret; } #else -__ai uint32_t vcgtzs_f32(float32_t __p0) { +__ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); +__ai uint8x8_t vceqz_p8(poly8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); return __ret; } #else -__ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8x8_t vceqz_p8(poly8x8_t __p0) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); +__ai uint64x1_t vceqz_p64(poly64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64x1_t vceqz_p64(poly64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); +__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); +__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); +__ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); +__ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); +__ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); +__ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); +__ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); +__ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); +__ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); +__ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vclezq_s8(int8x16_t __p0) { +__ai uint8x16_t vceqzq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 48); + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai uint8x16_t vclezq_s8(int8x16_t __p0) { +__ai uint8x16_t vceqzq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 48); + __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vclezq_f64(float64x2_t __p0) { +__ai uint64x2_t vceqzq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64x2_t vclezq_f64(float64x2_t __p0) { +__ai uint64x2_t vceqzq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vclezq_f32(float32x4_t __p0) { +__ai uint32x4_t vceqzq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai uint32x4_t vclezq_f32(float32x4_t __p0) { +__ai uint32x4_t vceqzq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vclezq_s32(int32x4_t __p0) { +__ai uint32x4_t vceqzq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai uint32x4_t vclezq_s32(int32x4_t __p0) { +__ai uint32x4_t vceqzq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); + __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vclezq_s64(int64x2_t __p0) { +__ai uint64x2_t vceqzq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64x2_t vclezq_s64(int64x2_t __p0) { +__ai uint64x2_t vceqzq_s64(int64x2_t __p0) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); + __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vclezq_s16(int16x8_t __p0) { +__ai uint16x8_t vceqzq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai uint16x8_t vclezq_s16(int16x8_t __p0) { +__ai uint16x8_t vceqzq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); + __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vclez_s8(int8x8_t __p0) { +__ai uint8x8_t vceqz_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__p0, 16); + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); return __ret; } #else -__ai uint8x8_t vclez_s8(int8x8_t __p0) { +__ai uint8x8_t vceqz_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x2_t vceqz_u32(uint32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); + return __ret; +} +#else +__ai uint32x2_t vceqz_u32(uint32x2_t __p0) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x1_t vceqz_u64(uint64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + return __ret; +} +#else +__ai uint64x1_t vceqz_u64(uint64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vceqz_u16(uint16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); + return __ret; +} +#else +__ai uint16x4_t vceqz_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vceqz_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); + return __ret; +} +#else +__ai uint8x8_t vceqz_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 16); + __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vclez_f64(float64x1_t __p0) { +__ai uint64x1_t vceqz_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x1_t vclez_f64(float64x1_t __p0) { +__ai uint64x1_t vceqz_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vclez_f32(float32x2_t __p0) { +__ai uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vclez_f32(float32x2_t __p0) { +__ai uint32x2_t vceqz_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vclez_s32(int32x2_t __p0) { +__ai uint32x2_t vceqz_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vclez_s32(int32x2_t __p0) { +__ai uint32x2_t vceqz_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); + __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vclez_s64(int64x1_t __p0) { +__ai uint64x1_t vceqz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x1_t vclez_s64(int64x1_t __p0) { +__ai uint64x1_t vceqz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vclez_s16(int16x4_t __p0) { +__ai uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai uint16x4_t vclez_s16(int16x4_t __p0) { +__ai uint16x4_t vceqz_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); + __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vclezd_s64(int64_t __p0) { +__ai uint64_t vceqzd_u64(uint64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); + return __ret; +} +#else +__ai uint64_t vceqzd_u64(uint64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64_t vceqzd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); + __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); return __ret; } #else -__ai int64_t vclezd_s64(int64_t __p0) { +__ai int64_t vceqzd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); + __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vclezd_f64(float64_t __p0) { +__ai uint64_t vceqzd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); return __ret; } #else -__ai uint64_t vclezd_f64(float64_t __p0) { +__ai uint64_t vceqzd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vclezs_f32(float32_t __p0) { +__ai uint32_t vceqzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); return __ret; } #else -__ai uint32_t vclezs_f32(float32_t __p0) { +__ai uint32_t vceqzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { +__ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { +__ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { +__ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { +__ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { +__ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { +__ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { +__ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { +__ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { +__ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #else -__ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { +__ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); +__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); return __ret; } #else -__ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); +__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); +__ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); return __ret; } #else -__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); +__ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { +__ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); return __ret; } #else -__ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { +__ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { +__ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); + __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); return __ret; } #else -__ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { +__ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); + __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vcltzq_s8(int8x16_t __p0) { +__ai uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 48); + __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai uint8x16_t vcltzq_s8(int8x16_t __p0) { +__ai uint8x16_t vcgezq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 48); + __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcltzq_f64(float64x2_t __p0) { +__ai uint64x2_t vcgezq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); + __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64x2_t vcltzq_f64(float64x2_t __p0) { +__ai uint64x2_t vcgezq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); + __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcltzq_f32(float32x4_t __p0) { +__ai uint32x4_t vcgezq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); + __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai uint32x4_t vcltzq_f32(float32x4_t __p0) { +__ai uint32x4_t vcgezq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); + __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vcltzq_s32(int32x4_t __p0) { +__ai uint32x4_t vcgezq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); + __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai uint32x4_t vcltzq_s32(int32x4_t __p0) { +__ai uint32x4_t vcgezq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); + __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcltzq_s64(int64x2_t __p0) { +__ai uint64x2_t vcgezq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); + __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai uint64x2_t vcltzq_s64(int64x2_t __p0) { +__ai uint64x2_t vcgezq_s64(int64x2_t __p0) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); + __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vcltzq_s16(int16x8_t __p0) { +__ai uint16x8_t vcgezq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); + __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai uint16x8_t vcltzq_s16(int16x8_t __p0) { +__ai uint16x8_t vcgezq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); + __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vcltz_s8(int8x8_t __p0) { +__ai uint8x8_t vcgez_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 16); + __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 16); return __ret; } #else -__ai uint8x8_t vcltz_s8(int8x8_t __p0) { +__ai uint8x8_t vcgez_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 16); + __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcltz_f64(float64x1_t __p0) { +__ai uint64x1_t vcgez_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x1_t vcltz_f64(float64x1_t __p0) { +__ai uint64x1_t vcgez_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcltz_f32(float32x2_t __p0) { +__ai uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); + __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vcltz_f32(float32x2_t __p0) { +__ai uint32x2_t vcgez_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); + __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcltz_s32(int32x2_t __p0) { +__ai uint32x2_t vcgez_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); + __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint32x2_t vcltz_s32(int32x2_t __p0) { +__ai uint32x2_t vcgez_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); + __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcltz_s64(int64x1_t __p0) { +__ai uint64x1_t vcgez_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x1_t vcltz_s64(int64x1_t __p0) { +__ai uint64x1_t vcgez_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcltz_s16(int16x4_t __p0) { +__ai uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); + __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); return __ret; } #else -__ai uint16x4_t vcltz_s16(int16x4_t __p0) { +__ai uint16x4_t vcgez_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); + __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcltzd_s64(int64_t __p0) { +__ai int64_t vcgezd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); + __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); return __ret; } #else -__ai int64_t vcltzd_s64(int64_t __p0) { +__ai int64_t vcgezd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); + __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcltzd_f64(float64_t __p0) { +__ai uint64_t vcgezd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); return __ret; } #else -__ai uint64_t vcltzd_f64(float64_t __p0) { +__ai uint64_t vcgezd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); + __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcltzs_f32(float32_t __p0) { +__ai uint32_t vcgezs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); return __ret; } #else -__ai uint32_t vcltzs_f32(float32_t __p0) { +__ai uint32_t vcgezs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); + __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { - poly64x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 1); +__ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 > __p1); return __ret; } #else -__ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { - poly64x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 1); +__ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { - float64x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 1); +__ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 > __p1); return __ret; } #else -__ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { - float64x2_t __ret; - __ret = __builtin_shufflevector(__p0, __p1, 0, 1); +__ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p8(__p0_12, __p1_12, __p2_12, __p3_12) __extension__ ({ \ - poly8x16_t __s0_12 = __p0_12; \ - poly8x8_t __s2_12 = __p2_12; \ - poly8x16_t __ret_12; \ - __ret_12 = vsetq_lane_p8(vget_lane_p8(__s2_12, __p3_12), __s0_12, __p1_12); \ - __ret_12; \ -}) -#else -#define vcopyq_lane_p8(__p0_13, __p1_13, __p2_13, __p3_13) __extension__ ({ \ - poly8x16_t __s0_13 = __p0_13; \ - poly8x8_t __s2_13 = __p2_13; \ - poly8x16_t __rev0_13; __rev0_13 = __builtin_shufflevector(__s0_13, __s0_13, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_13; __rev2_13 = __builtin_shufflevector(__s2_13, __s2_13, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_13; \ - __ret_13 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_13, __p3_13), __rev0_13, __p1_13); \ - __ret_13 = __builtin_shufflevector(__ret_13, __ret_13, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_13; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p16(__p0_14, __p1_14, __p2_14, __p3_14) __extension__ ({ \ - poly16x8_t __s0_14 = __p0_14; \ - poly16x4_t __s2_14 = __p2_14; \ - poly16x8_t __ret_14; \ - __ret_14 = vsetq_lane_p16(vget_lane_p16(__s2_14, __p3_14), __s0_14, __p1_14); \ - __ret_14; \ -}) -#else -#define vcopyq_lane_p16(__p0_15, __p1_15, __p2_15, __p3_15) __extension__ ({ \ - poly16x8_t __s0_15 = __p0_15; \ - poly16x4_t __s2_15 = __p2_15; \ - poly16x8_t __rev0_15; __rev0_15 = __builtin_shufflevector(__s0_15, __s0_15, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __rev2_15; __rev2_15 = __builtin_shufflevector(__s2_15, __s2_15, 3, 2, 1, 0); \ - poly16x8_t __ret_15; \ - __ret_15 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_15, __p3_15), __rev0_15, __p1_15); \ - __ret_15 = __builtin_shufflevector(__ret_15, __ret_15, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_15; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u8(__p0_16, __p1_16, __p2_16, __p3_16) __extension__ ({ \ - uint8x16_t __s0_16 = __p0_16; \ - uint8x8_t __s2_16 = __p2_16; \ - uint8x16_t __ret_16; \ - __ret_16 = vsetq_lane_u8(vget_lane_u8(__s2_16, __p3_16), __s0_16, __p1_16); \ - __ret_16; \ -}) -#else -#define vcopyq_lane_u8(__p0_17, __p1_17, __p2_17, __p3_17) __extension__ ({ \ - uint8x16_t __s0_17 = __p0_17; \ - uint8x8_t __s2_17 = __p2_17; \ - uint8x16_t __rev0_17; __rev0_17 = __builtin_shufflevector(__s0_17, __s0_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_17; __rev2_17 = __builtin_shufflevector(__s2_17, __s2_17, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_17; \ - __ret_17 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_17, __p3_17), __rev0_17, __p1_17); \ - __ret_17 = __builtin_shufflevector(__ret_17, __ret_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_17; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u32(__p0_18, __p1_18, __p2_18, __p3_18) __extension__ ({ \ - uint32x4_t __s0_18 = __p0_18; \ - uint32x2_t __s2_18 = __p2_18; \ - uint32x4_t __ret_18; \ - __ret_18 = vsetq_lane_u32(vget_lane_u32(__s2_18, __p3_18), __s0_18, __p1_18); \ - __ret_18; \ -}) -#else -#define vcopyq_lane_u32(__p0_19, __p1_19, __p2_19, __p3_19) __extension__ ({ \ - uint32x4_t __s0_19 = __p0_19; \ - uint32x2_t __s2_19 = __p2_19; \ - uint32x4_t __rev0_19; __rev0_19 = __builtin_shufflevector(__s0_19, __s0_19, 3, 2, 1, 0); \ - uint32x2_t __rev2_19; __rev2_19 = __builtin_shufflevector(__s2_19, __s2_19, 1, 0); \ - uint32x4_t __ret_19; \ - __ret_19 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_19, __p3_19), __rev0_19, __p1_19); \ - __ret_19 = __builtin_shufflevector(__ret_19, __ret_19, 3, 2, 1, 0); \ - __ret_19; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u64(__p0_20, __p1_20, __p2_20, __p3_20) __extension__ ({ \ - uint64x2_t __s0_20 = __p0_20; \ - uint64x1_t __s2_20 = __p2_20; \ - uint64x2_t __ret_20; \ - __ret_20 = vsetq_lane_u64(vget_lane_u64(__s2_20, __p3_20), __s0_20, __p1_20); \ - __ret_20; \ -}) -#else -#define vcopyq_lane_u64(__p0_21, __p1_21, __p2_21, __p3_21) __extension__ ({ \ - uint64x2_t __s0_21 = __p0_21; \ - uint64x1_t __s2_21 = __p2_21; \ - uint64x2_t __rev0_21; __rev0_21 = __builtin_shufflevector(__s0_21, __s0_21, 1, 0); \ - uint64x2_t __ret_21; \ - __ret_21 = __noswap_vsetq_lane_u64(__noswap_vget_lane_u64(__s2_21, __p3_21), __rev0_21, __p1_21); \ - __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, 1, 0); \ - __ret_21; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u16(__p0_22, __p1_22, __p2_22, __p3_22) __extension__ ({ \ - uint16x8_t __s0_22 = __p0_22; \ - uint16x4_t __s2_22 = __p2_22; \ - uint16x8_t __ret_22; \ - __ret_22 = vsetq_lane_u16(vget_lane_u16(__s2_22, __p3_22), __s0_22, __p1_22); \ - __ret_22; \ -}) -#else -#define vcopyq_lane_u16(__p0_23, __p1_23, __p2_23, __p3_23) __extension__ ({ \ - uint16x8_t __s0_23 = __p0_23; \ - uint16x4_t __s2_23 = __p2_23; \ - uint16x8_t __rev0_23; __rev0_23 = __builtin_shufflevector(__s0_23, __s0_23, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_23; __rev2_23 = __builtin_shufflevector(__s2_23, __s2_23, 3, 2, 1, 0); \ - uint16x8_t __ret_23; \ - __ret_23 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_23, __p3_23), __rev0_23, __p1_23); \ - __ret_23 = __builtin_shufflevector(__ret_23, __ret_23, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_23; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s8(__p0_24, __p1_24, __p2_24, __p3_24) __extension__ ({ \ - int8x16_t __s0_24 = __p0_24; \ - int8x8_t __s2_24 = __p2_24; \ - int8x16_t __ret_24; \ - __ret_24 = vsetq_lane_s8(vget_lane_s8(__s2_24, __p3_24), __s0_24, __p1_24); \ - __ret_24; \ -}) -#else -#define vcopyq_lane_s8(__p0_25, __p1_25, __p2_25, __p3_25) __extension__ ({ \ - int8x16_t __s0_25 = __p0_25; \ - int8x8_t __s2_25 = __p2_25; \ - int8x16_t __rev0_25; __rev0_25 = __builtin_shufflevector(__s0_25, __s0_25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_25; __rev2_25 = __builtin_shufflevector(__s2_25, __s2_25, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_25; \ - __ret_25 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_25, __p3_25), __rev0_25, __p1_25); \ - __ret_25 = __builtin_shufflevector(__ret_25, __ret_25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_25; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f32(__p0_26, __p1_26, __p2_26, __p3_26) __extension__ ({ \ - float32x4_t __s0_26 = __p0_26; \ - float32x2_t __s2_26 = __p2_26; \ - float32x4_t __ret_26; \ - __ret_26 = vsetq_lane_f32(vget_lane_f32(__s2_26, __p3_26), __s0_26, __p1_26); \ - __ret_26; \ -}) -#else -#define vcopyq_lane_f32(__p0_27, __p1_27, __p2_27, __p3_27) __extension__ ({ \ - float32x4_t __s0_27 = __p0_27; \ - float32x2_t __s2_27 = __p2_27; \ - float32x4_t __rev0_27; __rev0_27 = __builtin_shufflevector(__s0_27, __s0_27, 3, 2, 1, 0); \ - float32x2_t __rev2_27; __rev2_27 = __builtin_shufflevector(__s2_27, __s2_27, 1, 0); \ - float32x4_t __ret_27; \ - __ret_27 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_27, __p3_27), __rev0_27, __p1_27); \ - __ret_27 = __builtin_shufflevector(__ret_27, __ret_27, 3, 2, 1, 0); \ - __ret_27; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s32(__p0_28, __p1_28, __p2_28, __p3_28) __extension__ ({ \ - int32x4_t __s0_28 = __p0_28; \ - int32x2_t __s2_28 = __p2_28; \ - int32x4_t __ret_28; \ - __ret_28 = vsetq_lane_s32(vget_lane_s32(__s2_28, __p3_28), __s0_28, __p1_28); \ - __ret_28; \ -}) -#else -#define vcopyq_lane_s32(__p0_29, __p1_29, __p2_29, __p3_29) __extension__ ({ \ - int32x4_t __s0_29 = __p0_29; \ - int32x2_t __s2_29 = __p2_29; \ - int32x4_t __rev0_29; __rev0_29 = __builtin_shufflevector(__s0_29, __s0_29, 3, 2, 1, 0); \ - int32x2_t __rev2_29; __rev2_29 = __builtin_shufflevector(__s2_29, __s2_29, 1, 0); \ - int32x4_t __ret_29; \ - __ret_29 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_29, __p3_29), __rev0_29, __p1_29); \ - __ret_29 = __builtin_shufflevector(__ret_29, __ret_29, 3, 2, 1, 0); \ - __ret_29; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s64(__p0_30, __p1_30, __p2_30, __p3_30) __extension__ ({ \ - int64x2_t __s0_30 = __p0_30; \ - int64x1_t __s2_30 = __p2_30; \ - int64x2_t __ret_30; \ - __ret_30 = vsetq_lane_s64(vget_lane_s64(__s2_30, __p3_30), __s0_30, __p1_30); \ - __ret_30; \ -}) -#else -#define vcopyq_lane_s64(__p0_31, __p1_31, __p2_31, __p3_31) __extension__ ({ \ - int64x2_t __s0_31 = __p0_31; \ - int64x1_t __s2_31 = __p2_31; \ - int64x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, 1, 0); \ - int64x2_t __ret_31; \ - __ret_31 = __noswap_vsetq_lane_s64(__noswap_vget_lane_s64(__s2_31, __p3_31), __rev0_31, __p1_31); \ - __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, 1, 0); \ - __ret_31; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s16(__p0_32, __p1_32, __p2_32, __p3_32) __extension__ ({ \ - int16x8_t __s0_32 = __p0_32; \ - int16x4_t __s2_32 = __p2_32; \ - int16x8_t __ret_32; \ - __ret_32 = vsetq_lane_s16(vget_lane_s16(__s2_32, __p3_32), __s0_32, __p1_32); \ - __ret_32; \ -}) -#else -#define vcopyq_lane_s16(__p0_33, __p1_33, __p2_33, __p3_33) __extension__ ({ \ - int16x8_t __s0_33 = __p0_33; \ - int16x4_t __s2_33 = __p2_33; \ - int16x8_t __rev0_33; __rev0_33 = __builtin_shufflevector(__s0_33, __s0_33, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_33; __rev2_33 = __builtin_shufflevector(__s2_33, __s2_33, 3, 2, 1, 0); \ - int16x8_t __ret_33; \ - __ret_33 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_33, __p3_33), __rev0_33, __p1_33); \ - __ret_33 = __builtin_shufflevector(__ret_33, __ret_33, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_33; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p8(__p0_34, __p1_34, __p2_34, __p3_34) __extension__ ({ \ - poly8x8_t __s0_34 = __p0_34; \ - poly8x8_t __s2_34 = __p2_34; \ - poly8x8_t __ret_34; \ - __ret_34 = vset_lane_p8(vget_lane_p8(__s2_34, __p3_34), __s0_34, __p1_34); \ - __ret_34; \ -}) -#else -#define vcopy_lane_p8(__p0_35, __p1_35, __p2_35, __p3_35) __extension__ ({ \ - poly8x8_t __s0_35 = __p0_35; \ - poly8x8_t __s2_35 = __p2_35; \ - poly8x8_t __rev0_35; __rev0_35 = __builtin_shufflevector(__s0_35, __s0_35, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_35; __rev2_35 = __builtin_shufflevector(__s2_35, __s2_35, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_35; \ - __ret_35 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_35, __p3_35), __rev0_35, __p1_35); \ - __ret_35 = __builtin_shufflevector(__ret_35, __ret_35, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_35; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p16(__p0_36, __p1_36, __p2_36, __p3_36) __extension__ ({ \ - poly16x4_t __s0_36 = __p0_36; \ - poly16x4_t __s2_36 = __p2_36; \ - poly16x4_t __ret_36; \ - __ret_36 = vset_lane_p16(vget_lane_p16(__s2_36, __p3_36), __s0_36, __p1_36); \ - __ret_36; \ -}) -#else -#define vcopy_lane_p16(__p0_37, __p1_37, __p2_37, __p3_37) __extension__ ({ \ - poly16x4_t __s0_37 = __p0_37; \ - poly16x4_t __s2_37 = __p2_37; \ - poly16x4_t __rev0_37; __rev0_37 = __builtin_shufflevector(__s0_37, __s0_37, 3, 2, 1, 0); \ - poly16x4_t __rev2_37; __rev2_37 = __builtin_shufflevector(__s2_37, __s2_37, 3, 2, 1, 0); \ - poly16x4_t __ret_37; \ - __ret_37 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_37, __p3_37), __rev0_37, __p1_37); \ - __ret_37 = __builtin_shufflevector(__ret_37, __ret_37, 3, 2, 1, 0); \ - __ret_37; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u8(__p0_38, __p1_38, __p2_38, __p3_38) __extension__ ({ \ - uint8x8_t __s0_38 = __p0_38; \ - uint8x8_t __s2_38 = __p2_38; \ - uint8x8_t __ret_38; \ - __ret_38 = vset_lane_u8(vget_lane_u8(__s2_38, __p3_38), __s0_38, __p1_38); \ - __ret_38; \ -}) -#else -#define vcopy_lane_u8(__p0_39, __p1_39, __p2_39, __p3_39) __extension__ ({ \ - uint8x8_t __s0_39 = __p0_39; \ - uint8x8_t __s2_39 = __p2_39; \ - uint8x8_t __rev0_39; __rev0_39 = __builtin_shufflevector(__s0_39, __s0_39, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_39; __rev2_39 = __builtin_shufflevector(__s2_39, __s2_39, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_39; \ - __ret_39 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_39, __p3_39), __rev0_39, __p1_39); \ - __ret_39 = __builtin_shufflevector(__ret_39, __ret_39, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_39; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u32(__p0_40, __p1_40, __p2_40, __p3_40) __extension__ ({ \ - uint32x2_t __s0_40 = __p0_40; \ - uint32x2_t __s2_40 = __p2_40; \ - uint32x2_t __ret_40; \ - __ret_40 = vset_lane_u32(vget_lane_u32(__s2_40, __p3_40), __s0_40, __p1_40); \ - __ret_40; \ -}) -#else -#define vcopy_lane_u32(__p0_41, __p1_41, __p2_41, __p3_41) __extension__ ({ \ - uint32x2_t __s0_41 = __p0_41; \ - uint32x2_t __s2_41 = __p2_41; \ - uint32x2_t __rev0_41; __rev0_41 = __builtin_shufflevector(__s0_41, __s0_41, 1, 0); \ - uint32x2_t __rev2_41; __rev2_41 = __builtin_shufflevector(__s2_41, __s2_41, 1, 0); \ - uint32x2_t __ret_41; \ - __ret_41 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_41, __p3_41), __rev0_41, __p1_41); \ - __ret_41 = __builtin_shufflevector(__ret_41, __ret_41, 1, 0); \ - __ret_41; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u64(__p0_42, __p1_42, __p2_42, __p3_42) __extension__ ({ \ - uint64x1_t __s0_42 = __p0_42; \ - uint64x1_t __s2_42 = __p2_42; \ - uint64x1_t __ret_42; \ - __ret_42 = vset_lane_u64(vget_lane_u64(__s2_42, __p3_42), __s0_42, __p1_42); \ - __ret_42; \ -}) -#else -#define vcopy_lane_u64(__p0_43, __p1_43, __p2_43, __p3_43) __extension__ ({ \ - uint64x1_t __s0_43 = __p0_43; \ - uint64x1_t __s2_43 = __p2_43; \ - uint64x1_t __ret_43; \ - __ret_43 = __noswap_vset_lane_u64(__noswap_vget_lane_u64(__s2_43, __p3_43), __s0_43, __p1_43); \ - __ret_43; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u16(__p0_44, __p1_44, __p2_44, __p3_44) __extension__ ({ \ - uint16x4_t __s0_44 = __p0_44; \ - uint16x4_t __s2_44 = __p2_44; \ - uint16x4_t __ret_44; \ - __ret_44 = vset_lane_u16(vget_lane_u16(__s2_44, __p3_44), __s0_44, __p1_44); \ - __ret_44; \ -}) -#else -#define vcopy_lane_u16(__p0_45, __p1_45, __p2_45, __p3_45) __extension__ ({ \ - uint16x4_t __s0_45 = __p0_45; \ - uint16x4_t __s2_45 = __p2_45; \ - uint16x4_t __rev0_45; __rev0_45 = __builtin_shufflevector(__s0_45, __s0_45, 3, 2, 1, 0); \ - uint16x4_t __rev2_45; __rev2_45 = __builtin_shufflevector(__s2_45, __s2_45, 3, 2, 1, 0); \ - uint16x4_t __ret_45; \ - __ret_45 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_45, __p3_45), __rev0_45, __p1_45); \ - __ret_45 = __builtin_shufflevector(__ret_45, __ret_45, 3, 2, 1, 0); \ - __ret_45; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s8(__p0_46, __p1_46, __p2_46, __p3_46) __extension__ ({ \ - int8x8_t __s0_46 = __p0_46; \ - int8x8_t __s2_46 = __p2_46; \ - int8x8_t __ret_46; \ - __ret_46 = vset_lane_s8(vget_lane_s8(__s2_46, __p3_46), __s0_46, __p1_46); \ - __ret_46; \ -}) -#else -#define vcopy_lane_s8(__p0_47, __p1_47, __p2_47, __p3_47) __extension__ ({ \ - int8x8_t __s0_47 = __p0_47; \ - int8x8_t __s2_47 = __p2_47; \ - int8x8_t __rev0_47; __rev0_47 = __builtin_shufflevector(__s0_47, __s0_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_47; __rev2_47 = __builtin_shufflevector(__s2_47, __s2_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_47; \ - __ret_47 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_47, __p3_47), __rev0_47, __p1_47); \ - __ret_47 = __builtin_shufflevector(__ret_47, __ret_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_47; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_f32(__p0_48, __p1_48, __p2_48, __p3_48) __extension__ ({ \ - float32x2_t __s0_48 = __p0_48; \ - float32x2_t __s2_48 = __p2_48; \ - float32x2_t __ret_48; \ - __ret_48 = vset_lane_f32(vget_lane_f32(__s2_48, __p3_48), __s0_48, __p1_48); \ - __ret_48; \ -}) -#else -#define vcopy_lane_f32(__p0_49, __p1_49, __p2_49, __p3_49) __extension__ ({ \ - float32x2_t __s0_49 = __p0_49; \ - float32x2_t __s2_49 = __p2_49; \ - float32x2_t __rev0_49; __rev0_49 = __builtin_shufflevector(__s0_49, __s0_49, 1, 0); \ - float32x2_t __rev2_49; __rev2_49 = __builtin_shufflevector(__s2_49, __s2_49, 1, 0); \ - float32x2_t __ret_49; \ - __ret_49 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_49, __p3_49), __rev0_49, __p1_49); \ - __ret_49 = __builtin_shufflevector(__ret_49, __ret_49, 1, 0); \ - __ret_49; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s32(__p0_50, __p1_50, __p2_50, __p3_50) __extension__ ({ \ - int32x2_t __s0_50 = __p0_50; \ - int32x2_t __s2_50 = __p2_50; \ - int32x2_t __ret_50; \ - __ret_50 = vset_lane_s32(vget_lane_s32(__s2_50, __p3_50), __s0_50, __p1_50); \ - __ret_50; \ -}) +__ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 > __p1); + return __ret; +} #else -#define vcopy_lane_s32(__p0_51, __p1_51, __p2_51, __p3_51) __extension__ ({ \ - int32x2_t __s0_51 = __p0_51; \ - int32x2_t __s2_51 = __p2_51; \ - int32x2_t __rev0_51; __rev0_51 = __builtin_shufflevector(__s0_51, __s0_51, 1, 0); \ - int32x2_t __rev2_51; __rev2_51 = __builtin_shufflevector(__s2_51, __s2_51, 1, 0); \ - int32x2_t __ret_51; \ - __ret_51 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_51, __p3_51), __rev0_51, __p1_51); \ - __ret_51 = __builtin_shufflevector(__ret_51, __ret_51, 1, 0); \ - __ret_51; \ -}) +__ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s64(__p0_52, __p1_52, __p2_52, __p3_52) __extension__ ({ \ - int64x1_t __s0_52 = __p0_52; \ - int64x1_t __s2_52 = __p2_52; \ - int64x1_t __ret_52; \ - __ret_52 = vset_lane_s64(vget_lane_s64(__s2_52, __p3_52), __s0_52, __p1_52); \ - __ret_52; \ -}) +__ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #else -#define vcopy_lane_s64(__p0_53, __p1_53, __p2_53, __p3_53) __extension__ ({ \ - int64x1_t __s0_53 = __p0_53; \ - int64x1_t __s2_53 = __p2_53; \ - int64x1_t __ret_53; \ - __ret_53 = __noswap_vset_lane_s64(__noswap_vget_lane_s64(__s2_53, __p3_53), __s0_53, __p1_53); \ - __ret_53; \ -}) +__ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s16(__p0_54, __p1_54, __p2_54, __p3_54) __extension__ ({ \ - int16x4_t __s0_54 = __p0_54; \ - int16x4_t __s2_54 = __p2_54; \ - int16x4_t __ret_54; \ - __ret_54 = vset_lane_s16(vget_lane_s16(__s2_54, __p3_54), __s0_54, __p1_54); \ - __ret_54; \ -}) +__ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #else -#define vcopy_lane_s16(__p0_55, __p1_55, __p2_55, __p3_55) __extension__ ({ \ - int16x4_t __s0_55 = __p0_55; \ - int16x4_t __s2_55 = __p2_55; \ - int16x4_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 3, 2, 1, 0); \ - int16x4_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 3, 2, 1, 0); \ - int16x4_t __ret_55; \ - __ret_55 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_55, __p3_55), __rev0_55, __p1_55); \ - __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 3, 2, 1, 0); \ - __ret_55; \ -}) +__ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p8(__p0_56, __p1_56, __p2_56, __p3_56) __extension__ ({ \ - poly8x16_t __s0_56 = __p0_56; \ - poly8x16_t __s2_56 = __p2_56; \ - poly8x16_t __ret_56; \ - __ret_56 = vsetq_lane_p8(vgetq_lane_p8(__s2_56, __p3_56), __s0_56, __p1_56); \ - __ret_56; \ -}) +__ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #else -#define vcopyq_laneq_p8(__p0_57, __p1_57, __p2_57, __p3_57) __extension__ ({ \ - poly8x16_t __s0_57 = __p0_57; \ - poly8x16_t __s2_57 = __p2_57; \ - poly8x16_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_57; \ - __ret_57 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_57, __p3_57), __rev0_57, __p1_57); \ - __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_57; \ -}) +__ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 > __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p16(__p0_58, __p1_58, __p2_58, __p3_58) __extension__ ({ \ - poly16x8_t __s0_58 = __p0_58; \ - poly16x8_t __s2_58 = __p2_58; \ - poly16x8_t __ret_58; \ - __ret_58 = vsetq_lane_p16(vgetq_lane_p16(__s2_58, __p3_58), __s0_58, __p1_58); \ - __ret_58; \ -}) +__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); + return __ret; +} #else -#define vcopyq_laneq_p16(__p0_59, __p1_59, __p2_59, __p3_59) __extension__ ({ \ - poly16x8_t __s0_59 = __p0_59; \ - poly16x8_t __s2_59 = __p2_59; \ - poly16x8_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret_59; \ - __ret_59 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_59, __p3_59), __rev0_59, __p1_59); \ - __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_59; \ -}) +__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u8(__p0_60, __p1_60, __p2_60, __p3_60) __extension__ ({ \ - uint8x16_t __s0_60 = __p0_60; \ - uint8x16_t __s2_60 = __p2_60; \ - uint8x16_t __ret_60; \ - __ret_60 = vsetq_lane_u8(vgetq_lane_u8(__s2_60, __p3_60), __s0_60, __p1_60); \ - __ret_60; \ -}) +__ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); + return __ret; +} #else -#define vcopyq_laneq_u8(__p0_61, __p1_61, __p2_61, __p3_61) __extension__ ({ \ - uint8x16_t __s0_61 = __p0_61; \ - uint8x16_t __s2_61 = __p2_61; \ - uint8x16_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_61; \ - __ret_61 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_61, __p3_61), __rev0_61, __p1_61); \ - __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_61; \ -}) +__ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u32(__p0_62, __p1_62, __p2_62, __p3_62) __extension__ ({ \ - uint32x4_t __s0_62 = __p0_62; \ - uint32x4_t __s2_62 = __p2_62; \ - uint32x4_t __ret_62; \ - __ret_62 = vsetq_lane_u32(vgetq_lane_u32(__s2_62, __p3_62), __s0_62, __p1_62); \ - __ret_62; \ -}) +__ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); + return __ret; +} #else -#define vcopyq_laneq_u32(__p0_63, __p1_63, __p2_63, __p3_63) __extension__ ({ \ - uint32x4_t __s0_63 = __p0_63; \ - uint32x4_t __s2_63 = __p2_63; \ - uint32x4_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 3, 2, 1, 0); \ - uint32x4_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 3, 2, 1, 0); \ - uint32x4_t __ret_63; \ - __ret_63 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_63, __p3_63), __rev0_63, __p1_63); \ - __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 3, 2, 1, 0); \ - __ret_63; \ -}) +__ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u64(__p0_64, __p1_64, __p2_64, __p3_64) __extension__ ({ \ - uint64x2_t __s0_64 = __p0_64; \ - uint64x2_t __s2_64 = __p2_64; \ - uint64x2_t __ret_64; \ - __ret_64 = vsetq_lane_u64(vgetq_lane_u64(__s2_64, __p3_64), __s0_64, __p1_64); \ - __ret_64; \ -}) +__ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); + return __ret; +} #else -#define vcopyq_laneq_u64(__p0_65, __p1_65, __p2_65, __p3_65) __extension__ ({ \ - uint64x2_t __s0_65 = __p0_65; \ - uint64x2_t __s2_65 = __p2_65; \ - uint64x2_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 1, 0); \ - uint64x2_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 1, 0); \ - uint64x2_t __ret_65; \ - __ret_65 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_65, __p3_65), __rev0_65, __p1_65); \ - __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 1, 0); \ - __ret_65; \ -}) +__ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u16(__p0_66, __p1_66, __p2_66, __p3_66) __extension__ ({ \ - uint16x8_t __s0_66 = __p0_66; \ - uint16x8_t __s2_66 = __p2_66; \ - uint16x8_t __ret_66; \ - __ret_66 = vsetq_lane_u16(vgetq_lane_u16(__s2_66, __p3_66), __s0_66, __p1_66); \ - __ret_66; \ -}) +__ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 48); + return __ret; +} #else -#define vcopyq_laneq_u16(__p0_67, __p1_67, __p2_67, __p3_67) __extension__ ({ \ - uint16x8_t __s0_67 = __p0_67; \ - uint16x8_t __s2_67 = __p2_67; \ - uint16x8_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_67; \ - __ret_67 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_67, __p3_67), __rev0_67, __p1_67); \ - __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_67; \ -}) +__ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s8(__p0_68, __p1_68, __p2_68, __p3_68) __extension__ ({ \ - int8x16_t __s0_68 = __p0_68; \ - int8x16_t __s2_68 = __p2_68; \ - int8x16_t __ret_68; \ - __ret_68 = vsetq_lane_s8(vgetq_lane_s8(__s2_68, __p3_68), __s0_68, __p1_68); \ - __ret_68; \ -}) +__ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); + return __ret; +} #else -#define vcopyq_laneq_s8(__p0_69, __p1_69, __p2_69, __p3_69) __extension__ ({ \ - int8x16_t __s0_69 = __p0_69; \ - int8x16_t __s2_69 = __p2_69; \ - int8x16_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_69; \ - __ret_69 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_69, __p3_69), __rev0_69, __p1_69); \ - __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_69; \ -}) +__ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f32(__p0_70, __p1_70, __p2_70, __p3_70) __extension__ ({ \ - float32x4_t __s0_70 = __p0_70; \ - float32x4_t __s2_70 = __p2_70; \ - float32x4_t __ret_70; \ - __ret_70 = vsetq_lane_f32(vgetq_lane_f32(__s2_70, __p3_70), __s0_70, __p1_70); \ - __ret_70; \ -}) +__ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); + return __ret; +} #else -#define vcopyq_laneq_f32(__p0_71, __p1_71, __p2_71, __p3_71) __extension__ ({ \ - float32x4_t __s0_71 = __p0_71; \ - float32x4_t __s2_71 = __p2_71; \ - float32x4_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 3, 2, 1, 0); \ - float32x4_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 3, 2, 1, 0); \ - float32x4_t __ret_71; \ - __ret_71 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_71, __p3_71), __rev0_71, __p1_71); \ - __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, 3, 2, 1, 0); \ - __ret_71; \ -}) +__ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s32(__p0_72, __p1_72, __p2_72, __p3_72) __extension__ ({ \ - int32x4_t __s0_72 = __p0_72; \ - int32x4_t __s2_72 = __p2_72; \ - int32x4_t __ret_72; \ - __ret_72 = vsetq_lane_s32(vgetq_lane_s32(__s2_72, __p3_72), __s0_72, __p1_72); \ - __ret_72; \ -}) +__ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); + return __ret; +} #else -#define vcopyq_laneq_s32(__p0_73, __p1_73, __p2_73, __p3_73) __extension__ ({ \ - int32x4_t __s0_73 = __p0_73; \ - int32x4_t __s2_73 = __p2_73; \ - int32x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 3, 2, 1, 0); \ - int32x4_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 3, 2, 1, 0); \ - int32x4_t __ret_73; \ - __ret_73 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_73, __p3_73), __rev0_73, __p1_73); \ - __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 3, 2, 1, 0); \ - __ret_73; \ -}) +__ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s64(__p0_74, __p1_74, __p2_74, __p3_74) __extension__ ({ \ - int64x2_t __s0_74 = __p0_74; \ - int64x2_t __s2_74 = __p2_74; \ - int64x2_t __ret_74; \ - __ret_74 = vsetq_lane_s64(vgetq_lane_s64(__s2_74, __p3_74), __s0_74, __p1_74); \ - __ret_74; \ -}) +__ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); + return __ret; +} #else -#define vcopyq_laneq_s64(__p0_75, __p1_75, __p2_75, __p3_75) __extension__ ({ \ - int64x2_t __s0_75 = __p0_75; \ - int64x2_t __s2_75 = __p2_75; \ - int64x2_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 1, 0); \ - int64x2_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 1, 0); \ - int64x2_t __ret_75; \ - __ret_75 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_75, __p3_75), __rev0_75, __p1_75); \ - __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 1, 0); \ - __ret_75; \ -}) +__ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s16(__p0_76, __p1_76, __p2_76, __p3_76) __extension__ ({ \ - int16x8_t __s0_76 = __p0_76; \ - int16x8_t __s2_76 = __p2_76; \ - int16x8_t __ret_76; \ - __ret_76 = vsetq_lane_s16(vgetq_lane_s16(__s2_76, __p3_76), __s0_76, __p1_76); \ - __ret_76; \ -}) +__ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); + return __ret; +} #else -#define vcopyq_laneq_s16(__p0_77, __p1_77, __p2_77, __p3_77) __extension__ ({ \ - int16x8_t __s0_77 = __p0_77; \ - int16x8_t __s2_77 = __p2_77; \ - int16x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_77; \ - __ret_77 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_77, __p3_77), __rev0_77, __p1_77); \ - __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_77; \ -}) +__ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p8(__p0_78, __p1_78, __p2_78, __p3_78) __extension__ ({ \ - poly8x8_t __s0_78 = __p0_78; \ - poly8x16_t __s2_78 = __p2_78; \ - poly8x8_t __ret_78; \ - __ret_78 = vset_lane_p8(vgetq_lane_p8(__s2_78, __p3_78), __s0_78, __p1_78); \ - __ret_78; \ -}) +__ai uint8x8_t vcgtz_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 16); + return __ret; +} #else -#define vcopy_laneq_p8(__p0_79, __p1_79, __p2_79, __p3_79) __extension__ ({ \ - poly8x8_t __s0_79 = __p0_79; \ - poly8x16_t __s2_79 = __p2_79; \ - poly8x8_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_79; \ - __ret_79 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_79, __p3_79), __rev0_79, __p1_79); \ - __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_79; \ -}) +__ai uint8x8_t vcgtz_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p16(__p0_80, __p1_80, __p2_80, __p3_80) __extension__ ({ \ - poly16x4_t __s0_80 = __p0_80; \ - poly16x8_t __s2_80 = __p2_80; \ - poly16x4_t __ret_80; \ - __ret_80 = vset_lane_p16(vgetq_lane_p16(__s2_80, __p3_80), __s0_80, __p1_80); \ - __ret_80; \ -}) +__ai uint64x1_t vcgtz_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + return __ret; +} #else -#define vcopy_laneq_p16(__p0_81, __p1_81, __p2_81, __p3_81) __extension__ ({ \ - poly16x4_t __s0_81 = __p0_81; \ - poly16x8_t __s2_81 = __p2_81; \ - poly16x4_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 3, 2, 1, 0); \ - poly16x8_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __ret_81; \ - __ret_81 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_81, __p3_81), __rev0_81, __p1_81); \ - __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 3, 2, 1, 0); \ - __ret_81; \ -}) +__ai uint64x1_t vcgtz_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u8(__p0_82, __p1_82, __p2_82, __p3_82) __extension__ ({ \ - uint8x8_t __s0_82 = __p0_82; \ - uint8x16_t __s2_82 = __p2_82; \ - uint8x8_t __ret_82; \ - __ret_82 = vset_lane_u8(vgetq_lane_u8(__s2_82, __p3_82), __s0_82, __p1_82); \ - __ret_82; \ -}) +__ai uint32x2_t vcgtz_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); + return __ret; +} #else -#define vcopy_laneq_u8(__p0_83, __p1_83, __p2_83, __p3_83) __extension__ ({ \ - uint8x8_t __s0_83 = __p0_83; \ - uint8x16_t __s2_83 = __p2_83; \ - uint8x8_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_83; \ - __ret_83 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_83, __p3_83), __rev0_83, __p1_83); \ - __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_83; \ -}) +__ai uint32x2_t vcgtz_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u32(__p0_84, __p1_84, __p2_84, __p3_84) __extension__ ({ \ - uint32x2_t __s0_84 = __p0_84; \ - uint32x4_t __s2_84 = __p2_84; \ - uint32x2_t __ret_84; \ - __ret_84 = vset_lane_u32(vgetq_lane_u32(__s2_84, __p3_84), __s0_84, __p1_84); \ - __ret_84; \ -}) +__ai uint32x2_t vcgtz_s32(int32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); + return __ret; +} #else -#define vcopy_laneq_u32(__p0_85, __p1_85, __p2_85, __p3_85) __extension__ ({ \ - uint32x2_t __s0_85 = __p0_85; \ - uint32x4_t __s2_85 = __p2_85; \ - uint32x2_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, 1, 0); \ - uint32x4_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 3, 2, 1, 0); \ - uint32x2_t __ret_85; \ - __ret_85 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_85, __p3_85), __rev0_85, __p1_85); \ - __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, 1, 0); \ - __ret_85; \ -}) +__ai uint32x2_t vcgtz_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u64(__p0_86, __p1_86, __p2_86, __p3_86) __extension__ ({ \ - uint64x1_t __s0_86 = __p0_86; \ - uint64x2_t __s2_86 = __p2_86; \ - uint64x1_t __ret_86; \ - __ret_86 = vset_lane_u64(vgetq_lane_u64(__s2_86, __p3_86), __s0_86, __p1_86); \ - __ret_86; \ -}) +__ai uint64x1_t vcgtz_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + return __ret; +} #else -#define vcopy_laneq_u64(__p0_87, __p1_87, __p2_87, __p3_87) __extension__ ({ \ - uint64x1_t __s0_87 = __p0_87; \ - uint64x2_t __s2_87 = __p2_87; \ - uint64x2_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, 1, 0); \ - uint64x1_t __ret_87; \ - __ret_87 = __noswap_vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_87, __p3_87), __s0_87, __p1_87); \ - __ret_87; \ -}) +__ai uint64x1_t vcgtz_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x4_t vcgtz_s16(int16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); + return __ret; +} +#else +__ai uint16x4_t vcgtz_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u16(__p0_88, __p1_88, __p2_88, __p3_88) __extension__ ({ \ - uint16x4_t __s0_88 = __p0_88; \ - uint16x8_t __s2_88 = __p2_88; \ - uint16x4_t __ret_88; \ - __ret_88 = vset_lane_u16(vgetq_lane_u16(__s2_88, __p3_88), __s0_88, __p1_88); \ - __ret_88; \ -}) +__ai int64_t vcgtzd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); + return __ret; +} #else -#define vcopy_laneq_u16(__p0_89, __p1_89, __p2_89, __p3_89) __extension__ ({ \ - uint16x4_t __s0_89 = __p0_89; \ - uint16x8_t __s2_89 = __p2_89; \ - uint16x4_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 3, 2, 1, 0); \ - uint16x8_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_89; \ - __ret_89 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_89, __p3_89), __rev0_89, __p1_89); \ - __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 3, 2, 1, 0); \ - __ret_89; \ -}) +__ai int64_t vcgtzd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s8(__p0_90, __p1_90, __p2_90, __p3_90) __extension__ ({ \ - int8x8_t __s0_90 = __p0_90; \ - int8x16_t __s2_90 = __p2_90; \ - int8x8_t __ret_90; \ - __ret_90 = vset_lane_s8(vgetq_lane_s8(__s2_90, __p3_90), __s0_90, __p1_90); \ - __ret_90; \ -}) +__ai uint64_t vcgtzd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); + return __ret; +} #else -#define vcopy_laneq_s8(__p0_91, __p1_91, __p2_91, __p3_91) __extension__ ({ \ - int8x8_t __s0_91 = __p0_91; \ - int8x16_t __s2_91 = __p2_91; \ - int8x8_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_91; \ - __ret_91 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_91, __p3_91), __rev0_91, __p1_91); \ - __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_91; \ -}) +__ai uint64_t vcgtzd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f32(__p0_92, __p1_92, __p2_92, __p3_92) __extension__ ({ \ - float32x2_t __s0_92 = __p0_92; \ - float32x4_t __s2_92 = __p2_92; \ - float32x2_t __ret_92; \ - __ret_92 = vset_lane_f32(vgetq_lane_f32(__s2_92, __p3_92), __s0_92, __p1_92); \ - __ret_92; \ -}) +__ai uint32_t vcgtzs_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); + return __ret; +} #else -#define vcopy_laneq_f32(__p0_93, __p1_93, __p2_93, __p3_93) __extension__ ({ \ - float32x2_t __s0_93 = __p0_93; \ - float32x4_t __s2_93 = __p2_93; \ - float32x2_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 1, 0); \ - float32x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, 3, 2, 1, 0); \ - float32x2_t __ret_93; \ - __ret_93 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_93, __p3_93), __rev0_93, __p1_93); \ - __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, 1, 0); \ - __ret_93; \ -}) +__ai uint32_t vcgtzs_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s32(__p0_94, __p1_94, __p2_94, __p3_94) __extension__ ({ \ - int32x2_t __s0_94 = __p0_94; \ - int32x4_t __s2_94 = __p2_94; \ - int32x2_t __ret_94; \ - __ret_94 = vset_lane_s32(vgetq_lane_s32(__s2_94, __p3_94), __s0_94, __p1_94); \ - __ret_94; \ -}) +__ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 <= __p1); + return __ret; +} #else -#define vcopy_laneq_s32(__p0_95, __p1_95, __p2_95, __p3_95) __extension__ ({ \ - int32x2_t __s0_95 = __p0_95; \ - int32x4_t __s2_95 = __p2_95; \ - int32x2_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, 1, 0); \ - int32x4_t __rev2_95; __rev2_95 = __builtin_shufflevector(__s2_95, __s2_95, 3, 2, 1, 0); \ - int32x2_t __ret_95; \ - __ret_95 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_95, __p3_95), __rev0_95, __p1_95); \ - __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, 1, 0); \ - __ret_95; \ -}) +__ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s64(__p0_96, __p1_96, __p2_96, __p3_96) __extension__ ({ \ - int64x1_t __s0_96 = __p0_96; \ - int64x2_t __s2_96 = __p2_96; \ - int64x1_t __ret_96; \ - __ret_96 = vset_lane_s64(vgetq_lane_s64(__s2_96, __p3_96), __s0_96, __p1_96); \ - __ret_96; \ -}) +__ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 <= __p1); + return __ret; +} #else -#define vcopy_laneq_s64(__p0_97, __p1_97, __p2_97, __p3_97) __extension__ ({ \ - int64x1_t __s0_97 = __p0_97; \ - int64x2_t __s2_97 = __p2_97; \ - int64x2_t __rev2_97; __rev2_97 = __builtin_shufflevector(__s2_97, __s2_97, 1, 0); \ - int64x1_t __ret_97; \ - __ret_97 = __noswap_vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_97, __p3_97), __s0_97, __p1_97); \ - __ret_97; \ -}) +__ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s16(__p0_98, __p1_98, __p2_98, __p3_98) __extension__ ({ \ - int16x4_t __s0_98 = __p0_98; \ - int16x8_t __s2_98 = __p2_98; \ - int16x4_t __ret_98; \ - __ret_98 = vset_lane_s16(vgetq_lane_s16(__s2_98, __p3_98), __s0_98, __p1_98); \ - __ret_98; \ -}) +__ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 <= __p1); + return __ret; +} #else -#define vcopy_laneq_s16(__p0_99, __p1_99, __p2_99, __p3_99) __extension__ ({ \ - int16x4_t __s0_99 = __p0_99; \ - int16x8_t __s2_99 = __p2_99; \ - int16x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, 3, 2, 1, 0); \ - int16x8_t __rev2_99; __rev2_99 = __builtin_shufflevector(__s2_99, __s2_99, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_99; \ - __ret_99 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_99, __p3_99), __rev0_99, __p1_99); \ - __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, 3, 2, 1, 0); \ - __ret_99; \ -}) +__ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vcreate_p64(uint64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #else -__ai poly64x1_t vcreate_p64(uint64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); +__ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vcreate_f64(uint64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #else -__ai float64x1_t vcreate_f64(uint64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); +__ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vcvts_f32_s32(int32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); +__ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #else -__ai float32_t vcvts_f32_s32(int32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); +__ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vcvts_f32_u32(uint32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); +__ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); return __ret; } #else -__ai float32_t vcvts_f32_u32(uint32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); +__ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); +__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); return __ret; } #else -__ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float32x2_t __noswap_vcvt_f32_f64(float64x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); +__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vcvtd_f64_s64(int64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); +__ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); return __ret; } #else -__ai float64_t vcvtd_f64_s64(int64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); +__ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vcvtd_f64_u64(uint64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); +__ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); return __ret; } #else -__ai float64_t vcvtd_f64_u64(uint64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); +__ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 51); +__ai uint8x16_t vclezq_s8(int8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8x16_t vclezq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 35); +__ai uint64x2_t vclezq_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 35); +__ai uint64x2_t vclezq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); +__ai uint32x4_t vclezq_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); +__ai uint32x4_t vclezq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); +__ai uint32x4_t vclezq_s32(int32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); +__ai uint32x4_t vclezq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); +__ai uint64x2_t vclezq_s64(int64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__rev0, 42); +__ai uint64x2_t vclezq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai float64x2_t __noswap_vcvt_f64_f32(float32x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vclezq_s16(int16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); + return __ret; +} +#else +__ai uint16x8_t vclezq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { - float16x8_t __ret; - __ret = vcombine_f16(__p0, vcvt_f16_f32(__p1)); +__ai uint8x8_t vclez_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__p0, 16); return __ret; } #else -__ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x8_t __ret; - __ret = __noswap_vcombine_f16(__rev0, __noswap_vcvt_f16_f32(__rev1)); +__ai uint8x8_t vclez_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { - float32x4_t __ret; - __ret = vcvt_f32_f16(vget_high_f16(__p0)); +__ai uint64x1_t vclez_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } #else -__ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float32x4_t __ret; - __ret = __noswap_vcvt_f32_f16(__noswap_vget_high_f16(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint64x1_t vclez_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { - float32x4_t __ret; - __ret = vcombine_f32(__p0, vcvt_f32_f64(__p1)); +__ai uint32x2_t vclez_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); return __ret; } #else -__ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { +__ai uint32x2_t vclez_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x4_t __ret; - __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvt_f32_f64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { - float64x2_t __ret; - __ret = vcvt_f64_f32(vget_high_f32(__p0)); +__ai uint32x2_t vclez_s32(int32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); return __ret; } #else -__ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float64x2_t __ret; - __ret = __noswap_vcvt_f64_f32(__noswap_vget_high_f32(__rev0)); +__ai uint32x2_t vclez_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclez_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + return __ret; +} #else -#define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclez_s64(int64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ - __ret; \ -}) +__ai uint16x4_t vclez_s16(int16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); + return __ret; +} #else -#define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ - __ret; \ -}) +__ai uint16x4_t vclez_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 51); \ - __ret; \ -}) +__ai int64_t vclezd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); + return __ret; +} #else -#define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int64_t vclezd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 35); \ - __ret; \ -}) +__ai uint64_t vclezd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); + return __ret; +} #else -#define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint64_t vclezd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32_t vclezs_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); + return __ret; +} +#else +__ai uint32_t vclezs_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 < __p1); + return __ret; +} #else -#define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 < __p1); + return __ret; +} #else -#define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0 < __p1); + return __ret; +} #else -#define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t)(__rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #else -#define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #else -#define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ - __ret; \ -}) +__ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__s0, __p1, 35); \ - __ret; \ -}) +__ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #else -#define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0 < __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); + return __ret; +} #else -#define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) +__ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ - __ret; \ -}) +__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); + return __ret; +} #else -#define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ - __ret; \ -}) +__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ - __ret; \ -}) +__ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); + return __ret; +} #else -#define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ - __ret; \ -}) +__ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__s0, __p1, 51); \ - __ret; \ -}) +__ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); + return __ret; +} #else -#define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint8x16_t vcltzq_s8(int8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 48); + return __ret; +} #else -#define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) +__ai uint8x16_t vcltzq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x2_t vcltzq_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); + return __ret; +} #else -#define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ - __ret; \ -}) +__ai uint64x2_t vcltzq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vcvts_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); +__ai uint32x4_t vcltzq_f32(float32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai int32_t vcvts_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); +__ai uint32x4_t vcltzq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcvtd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); +__ai uint32x4_t vcltzq_s32(int32x4_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); return __ret; } #else -__ai int64_t vcvtd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); +__ai uint32x4_t vcltzq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__p0, 35); +__ai uint64x2_t vcltzq_s64(int64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); return __ret; } #else -__ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__rev0, 35); +__ai uint64x2_t vcltzq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); +__ai uint16x8_t vcltzq_s16(int16x8_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); return __ret; } #else -__ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); +__ai uint16x8_t vcltzq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcvts_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); +__ai uint8x8_t vcltz_s8(int8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 16); return __ret; } #else -__ai uint32_t vcvts_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); +__ai uint8x8_t vcltz_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcvtd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); +__ai uint64x1_t vcltz_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64_t vcvtd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); +__ai uint64x1_t vcltz_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__p0, 51); +__ai uint32x2_t vcltz_f32(float32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); return __ret; } #else -__ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__rev0, 51); +__ai uint32x2_t vcltz_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { +__ai uint32x2_t vcltz_s32(int32x2_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); + return __ret; +} +#else +__ai uint32x2_t vcltz_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x1_t vcltz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #else -__ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { +__ai uint64x1_t vcltz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); + __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vcvtas_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); +__ai uint16x4_t vcltz_s16(int16x4_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); return __ret; } #else -__ai int32_t vcvtas_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); +__ai uint16x4_t vcltz_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcvtad_s64_f64(float64_t __p0) { +__ai int64_t vcltzd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); + __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); return __ret; } #else -__ai int64_t vcvtad_s64_f64(float64_t __p0) { +__ai int64_t vcltzd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); + __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcvtas_u32_f32(float32_t __p0) { +__ai uint64_t vcltzd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); + return __ret; +} +#else +__ai uint64_t vcltzd_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32_t vcltzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); + __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); return __ret; } #else -__ai uint32_t vcvtas_u32_f32(float32_t __p0) { +__ai uint32_t vcltzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); + __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcvtad_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); +__ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { + poly64x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else -__ai uint64_t vcvtad_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); +__ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { + poly64x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vcvtms_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); +__ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { + float64x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else -__ai int32_t vcvtms_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); +__ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { + float64x2_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcvtmd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_p8(__p0_12, __p1_12, __p2_12, __p3_12) __extension__ ({ \ + poly8x16_t __s0_12 = __p0_12; \ + poly8x8_t __s2_12 = __p2_12; \ + poly8x16_t __ret_12; \ + __ret_12 = vsetq_lane_p8(vget_lane_p8(__s2_12, __p3_12), __s0_12, __p1_12); \ + __ret_12; \ +}) +#else +#define vcopyq_lane_p8(__p0_13, __p1_13, __p2_13, __p3_13) __extension__ ({ \ + poly8x16_t __s0_13 = __p0_13; \ + poly8x8_t __s2_13 = __p2_13; \ + poly8x16_t __rev0_13; __rev0_13 = __builtin_shufflevector(__s0_13, __s0_13, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev2_13; __rev2_13 = __builtin_shufflevector(__s2_13, __s2_13, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_13; \ + __ret_13 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_13, __p3_13), __rev0_13, __p1_13); \ + __ret_13 = __builtin_shufflevector(__ret_13, __ret_13, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_13; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopyq_lane_p16(__p0_14, __p1_14, __p2_14, __p3_14) __extension__ ({ \ + poly16x8_t __s0_14 = __p0_14; \ + poly16x4_t __s2_14 = __p2_14; \ + poly16x8_t __ret_14; \ + __ret_14 = vsetq_lane_p16(vget_lane_p16(__s2_14, __p3_14), __s0_14, __p1_14); \ + __ret_14; \ +}) +#else +#define vcopyq_lane_p16(__p0_15, __p1_15, __p2_15, __p3_15) __extension__ ({ \ + poly16x8_t __s0_15 = __p0_15; \ + poly16x4_t __s2_15 = __p2_15; \ + poly16x8_t __rev0_15; __rev0_15 = __builtin_shufflevector(__s0_15, __s0_15, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __rev2_15; __rev2_15 = __builtin_shufflevector(__s2_15, __s2_15, 3, 2, 1, 0); \ + poly16x8_t __ret_15; \ + __ret_15 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_15, __p3_15), __rev0_15, __p1_15); \ + __ret_15 = __builtin_shufflevector(__ret_15, __ret_15, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_15; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopyq_lane_u8(__p0_16, __p1_16, __p2_16, __p3_16) __extension__ ({ \ + uint8x16_t __s0_16 = __p0_16; \ + uint8x8_t __s2_16 = __p2_16; \ + uint8x16_t __ret_16; \ + __ret_16 = vsetq_lane_u8(vget_lane_u8(__s2_16, __p3_16), __s0_16, __p1_16); \ + __ret_16; \ +}) #else -__ai int64_t vcvtmd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_u8(__p0_17, __p1_17, __p2_17, __p3_17) __extension__ ({ \ + uint8x16_t __s0_17 = __p0_17; \ + uint8x8_t __s2_17 = __p2_17; \ + uint8x16_t __rev0_17; __rev0_17 = __builtin_shufflevector(__s0_17, __s0_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_17; __rev2_17 = __builtin_shufflevector(__s2_17, __s2_17, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_17; \ + __ret_17 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_17, __p3_17), __rev0_17, __p1_17); \ + __ret_17 = __builtin_shufflevector(__ret_17, __ret_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_17; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcvtms_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); - return __ret; -} +#define vcopyq_lane_u32(__p0_18, __p1_18, __p2_18, __p3_18) __extension__ ({ \ + uint32x4_t __s0_18 = __p0_18; \ + uint32x2_t __s2_18 = __p2_18; \ + uint32x4_t __ret_18; \ + __ret_18 = vsetq_lane_u32(vget_lane_u32(__s2_18, __p3_18), __s0_18, __p1_18); \ + __ret_18; \ +}) #else -__ai uint32_t vcvtms_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); - return __ret; -} +#define vcopyq_lane_u32(__p0_19, __p1_19, __p2_19, __p3_19) __extension__ ({ \ + uint32x4_t __s0_19 = __p0_19; \ + uint32x2_t __s2_19 = __p2_19; \ + uint32x4_t __rev0_19; __rev0_19 = __builtin_shufflevector(__s0_19, __s0_19, 3, 2, 1, 0); \ + uint32x2_t __rev2_19; __rev2_19 = __builtin_shufflevector(__s2_19, __s2_19, 1, 0); \ + uint32x4_t __ret_19; \ + __ret_19 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_19, __p3_19), __rev0_19, __p1_19); \ + __ret_19 = __builtin_shufflevector(__ret_19, __ret_19, 3, 2, 1, 0); \ + __ret_19; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcvtmd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); - return __ret; -} +#define vcopyq_lane_u64(__p0_20, __p1_20, __p2_20, __p3_20) __extension__ ({ \ + uint64x2_t __s0_20 = __p0_20; \ + uint64x1_t __s2_20 = __p2_20; \ + uint64x2_t __ret_20; \ + __ret_20 = vsetq_lane_u64(vget_lane_u64(__s2_20, __p3_20), __s0_20, __p1_20); \ + __ret_20; \ +}) #else -__ai uint64_t vcvtmd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); - return __ret; -} +#define vcopyq_lane_u64(__p0_21, __p1_21, __p2_21, __p3_21) __extension__ ({ \ + uint64x2_t __s0_21 = __p0_21; \ + uint64x1_t __s2_21 = __p2_21; \ + uint64x2_t __rev0_21; __rev0_21 = __builtin_shufflevector(__s0_21, __s0_21, 1, 0); \ + uint64x2_t __ret_21; \ + __ret_21 = __noswap_vsetq_lane_u64(__noswap_vget_lane_u64(__s2_21, __p3_21), __rev0_21, __p1_21); \ + __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, 1, 0); \ + __ret_21; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vcvtns_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); - return __ret; -} +#define vcopyq_lane_u16(__p0_22, __p1_22, __p2_22, __p3_22) __extension__ ({ \ + uint16x8_t __s0_22 = __p0_22; \ + uint16x4_t __s2_22 = __p2_22; \ + uint16x8_t __ret_22; \ + __ret_22 = vsetq_lane_u16(vget_lane_u16(__s2_22, __p3_22), __s0_22, __p1_22); \ + __ret_22; \ +}) #else -__ai int32_t vcvtns_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); - return __ret; -} +#define vcopyq_lane_u16(__p0_23, __p1_23, __p2_23, __p3_23) __extension__ ({ \ + uint16x8_t __s0_23 = __p0_23; \ + uint16x4_t __s2_23 = __p2_23; \ + uint16x8_t __rev0_23; __rev0_23 = __builtin_shufflevector(__s0_23, __s0_23, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2_23; __rev2_23 = __builtin_shufflevector(__s2_23, __s2_23, 3, 2, 1, 0); \ + uint16x8_t __ret_23; \ + __ret_23 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_23, __p3_23), __rev0_23, __p1_23); \ + __ret_23 = __builtin_shufflevector(__ret_23, __ret_23, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_23; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcvtnd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s8(__p0_24, __p1_24, __p2_24, __p3_24) __extension__ ({ \ + int8x16_t __s0_24 = __p0_24; \ + int8x8_t __s2_24 = __p2_24; \ + int8x16_t __ret_24; \ + __ret_24 = vsetq_lane_s8(vget_lane_s8(__s2_24, __p3_24), __s0_24, __p1_24); \ + __ret_24; \ +}) #else -__ai int64_t vcvtnd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s8(__p0_25, __p1_25, __p2_25, __p3_25) __extension__ ({ \ + int8x16_t __s0_25 = __p0_25; \ + int8x8_t __s2_25 = __p2_25; \ + int8x16_t __rev0_25; __rev0_25 = __builtin_shufflevector(__s0_25, __s0_25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_25; __rev2_25 = __builtin_shufflevector(__s2_25, __s2_25, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_25; \ + __ret_25 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_25, __p3_25), __rev0_25, __p1_25); \ + __ret_25 = __builtin_shufflevector(__ret_25, __ret_25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_25; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcvtns_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); - return __ret; -} +#define vcopyq_lane_f32(__p0_26, __p1_26, __p2_26, __p3_26) __extension__ ({ \ + float32x4_t __s0_26 = __p0_26; \ + float32x2_t __s2_26 = __p2_26; \ + float32x4_t __ret_26; \ + __ret_26 = vsetq_lane_f32(vget_lane_f32(__s2_26, __p3_26), __s0_26, __p1_26); \ + __ret_26; \ +}) #else -__ai uint32_t vcvtns_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); - return __ret; -} +#define vcopyq_lane_f32(__p0_27, __p1_27, __p2_27, __p3_27) __extension__ ({ \ + float32x4_t __s0_27 = __p0_27; \ + float32x2_t __s2_27 = __p2_27; \ + float32x4_t __rev0_27; __rev0_27 = __builtin_shufflevector(__s0_27, __s0_27, 3, 2, 1, 0); \ + float32x2_t __rev2_27; __rev2_27 = __builtin_shufflevector(__s2_27, __s2_27, 1, 0); \ + float32x4_t __ret_27; \ + __ret_27 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_27, __p3_27), __rev0_27, __p1_27); \ + __ret_27 = __builtin_shufflevector(__ret_27, __ret_27, 3, 2, 1, 0); \ + __ret_27; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcvtnd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s32(__p0_28, __p1_28, __p2_28, __p3_28) __extension__ ({ \ + int32x4_t __s0_28 = __p0_28; \ + int32x2_t __s2_28 = __p2_28; \ + int32x4_t __ret_28; \ + __ret_28 = vsetq_lane_s32(vget_lane_s32(__s2_28, __p3_28), __s0_28, __p1_28); \ + __ret_28; \ +}) #else -__ai uint64_t vcvtnd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s32(__p0_29, __p1_29, __p2_29, __p3_29) __extension__ ({ \ + int32x4_t __s0_29 = __p0_29; \ + int32x2_t __s2_29 = __p2_29; \ + int32x4_t __rev0_29; __rev0_29 = __builtin_shufflevector(__s0_29, __s0_29, 3, 2, 1, 0); \ + int32x2_t __rev2_29; __rev2_29 = __builtin_shufflevector(__s2_29, __s2_29, 1, 0); \ + int32x4_t __ret_29; \ + __ret_29 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_29, __p3_29), __rev0_29, __p1_29); \ + __ret_29 = __builtin_shufflevector(__ret_29, __ret_29, 3, 2, 1, 0); \ + __ret_29; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vcvtps_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); - return __ret; -} +#define vcopyq_lane_s64(__p0_30, __p1_30, __p2_30, __p3_30) __extension__ ({ \ + int64x2_t __s0_30 = __p0_30; \ + int64x1_t __s2_30 = __p2_30; \ + int64x2_t __ret_30; \ + __ret_30 = vsetq_lane_s64(vget_lane_s64(__s2_30, __p3_30), __s0_30, __p1_30); \ + __ret_30; \ +}) #else -__ai int32_t vcvtps_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); - return __ret; -} +#define vcopyq_lane_s64(__p0_31, __p1_31, __p2_31, __p3_31) __extension__ ({ \ + int64x2_t __s0_31 = __p0_31; \ + int64x1_t __s2_31 = __p2_31; \ + int64x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, 1, 0); \ + int64x2_t __ret_31; \ + __ret_31 = __noswap_vsetq_lane_s64(__noswap_vget_lane_s64(__s2_31, __p3_31), __rev0_31, __p1_31); \ + __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, 1, 0); \ + __ret_31; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vcvtpd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s16(__p0_32, __p1_32, __p2_32, __p3_32) __extension__ ({ \ + int16x8_t __s0_32 = __p0_32; \ + int16x4_t __s2_32 = __p2_32; \ + int16x8_t __ret_32; \ + __ret_32 = vsetq_lane_s16(vget_lane_s16(__s2_32, __p3_32), __s0_32, __p1_32); \ + __ret_32; \ +}) #else -__ai int64_t vcvtpd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); - return __ret; -} +#define vcopyq_lane_s16(__p0_33, __p1_33, __p2_33, __p3_33) __extension__ ({ \ + int16x8_t __s0_33 = __p0_33; \ + int16x4_t __s2_33 = __p2_33; \ + int16x8_t __rev0_33; __rev0_33 = __builtin_shufflevector(__s0_33, __s0_33, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_33; __rev2_33 = __builtin_shufflevector(__s2_33, __s2_33, 3, 2, 1, 0); \ + int16x8_t __ret_33; \ + __ret_33 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_33, __p3_33), __rev0_33, __p1_33); \ + __ret_33 = __builtin_shufflevector(__ret_33, __ret_33, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_33; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vcvtps_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); - return __ret; -} +#define vcopy_lane_p8(__p0_34, __p1_34, __p2_34, __p3_34) __extension__ ({ \ + poly8x8_t __s0_34 = __p0_34; \ + poly8x8_t __s2_34 = __p2_34; \ + poly8x8_t __ret_34; \ + __ret_34 = vset_lane_p8(vget_lane_p8(__s2_34, __p3_34), __s0_34, __p1_34); \ + __ret_34; \ +}) #else -__ai uint32_t vcvtps_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); - return __ret; -} +#define vcopy_lane_p8(__p0_35, __p1_35, __p2_35, __p3_35) __extension__ ({ \ + poly8x8_t __s0_35 = __p0_35; \ + poly8x8_t __s2_35 = __p2_35; \ + poly8x8_t __rev0_35; __rev0_35 = __builtin_shufflevector(__s0_35, __s0_35, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev2_35; __rev2_35 = __builtin_shufflevector(__s2_35, __s2_35, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_35; \ + __ret_35 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_35, __p3_35), __rev0_35, __p1_35); \ + __ret_35 = __builtin_shufflevector(__ret_35, __ret_35, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_35; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vcvtpd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); - return __ret; -} +#define vcopy_lane_p16(__p0_36, __p1_36, __p2_36, __p3_36) __extension__ ({ \ + poly16x4_t __s0_36 = __p0_36; \ + poly16x4_t __s2_36 = __p2_36; \ + poly16x4_t __ret_36; \ + __ret_36 = vset_lane_p16(vget_lane_p16(__s2_36, __p3_36), __s0_36, __p1_36); \ + __ret_36; \ +}) #else -__ai uint64_t vcvtpd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); - return __ret; -} +#define vcopy_lane_p16(__p0_37, __p1_37, __p2_37, __p3_37) __extension__ ({ \ + poly16x4_t __s0_37 = __p0_37; \ + poly16x4_t __s2_37 = __p2_37; \ + poly16x4_t __rev0_37; __rev0_37 = __builtin_shufflevector(__s0_37, __s0_37, 3, 2, 1, 0); \ + poly16x4_t __rev2_37; __rev2_37 = __builtin_shufflevector(__s2_37, __s2_37, 3, 2, 1, 0); \ + poly16x4_t __ret_37; \ + __ret_37 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_37, __p3_37), __rev0_37, __p1_37); \ + __ret_37 = __builtin_shufflevector(__ret_37, __ret_37, 3, 2, 1, 0); \ + __ret_37; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vcvtxd_f32_f64(float64_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); - return __ret; -} +#define vcopy_lane_u8(__p0_38, __p1_38, __p2_38, __p3_38) __extension__ ({ \ + uint8x8_t __s0_38 = __p0_38; \ + uint8x8_t __s2_38 = __p2_38; \ + uint8x8_t __ret_38; \ + __ret_38 = vset_lane_u8(vget_lane_u8(__s2_38, __p3_38), __s0_38, __p1_38); \ + __ret_38; \ +}) #else -__ai float32_t vcvtxd_f32_f64(float64_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); - return __ret; -} +#define vcopy_lane_u8(__p0_39, __p1_39, __p2_39, __p3_39) __extension__ ({ \ + uint8x8_t __s0_39 = __p0_39; \ + uint8x8_t __s2_39 = __p2_39; \ + uint8x8_t __rev0_39; __rev0_39 = __builtin_shufflevector(__s0_39, __s0_39, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_39; __rev2_39 = __builtin_shufflevector(__s2_39, __s2_39, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_39; \ + __ret_39 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_39, __p3_39), __rev0_39, __p1_39); \ + __ret_39 = __builtin_shufflevector(__ret_39, __ret_39, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_39; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); - return __ret; -} +#define vcopy_lane_u32(__p0_40, __p1_40, __p2_40, __p3_40) __extension__ ({ \ + uint32x2_t __s0_40 = __p0_40; \ + uint32x2_t __s2_40 = __p2_40; \ + uint32x2_t __ret_40; \ + __ret_40 = vset_lane_u32(vget_lane_u32(__s2_40, __p3_40), __s0_40, __p1_40); \ + __ret_40; \ +}) #else -__ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float32x2_t __noswap_vcvtx_f32_f64(float64x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); - return __ret; -} +#define vcopy_lane_u32(__p0_41, __p1_41, __p2_41, __p3_41) __extension__ ({ \ + uint32x2_t __s0_41 = __p0_41; \ + uint32x2_t __s2_41 = __p2_41; \ + uint32x2_t __rev0_41; __rev0_41 = __builtin_shufflevector(__s0_41, __s0_41, 1, 0); \ + uint32x2_t __rev2_41; __rev2_41 = __builtin_shufflevector(__s2_41, __s2_41, 1, 0); \ + uint32x2_t __ret_41; \ + __ret_41 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_41, __p3_41), __rev0_41, __p1_41); \ + __ret_41 = __builtin_shufflevector(__ret_41, __ret_41, 1, 0); \ + __ret_41; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { - float32x4_t __ret; - __ret = vcombine_f32(__p0, vcvtx_f32_f64(__p1)); - return __ret; -} +#define vcopy_lane_u64(__p0_42, __p1_42, __p2_42, __p3_42) __extension__ ({ \ + uint64x1_t __s0_42 = __p0_42; \ + uint64x1_t __s2_42 = __p2_42; \ + uint64x1_t __ret_42; \ + __ret_42 = vset_lane_u64(vget_lane_u64(__s2_42, __p3_42), __s0_42, __p1_42); \ + __ret_42; \ +}) #else -__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x4_t __ret; - __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvtx_f32_f64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vcopy_lane_u64(__p0_43, __p1_43, __p2_43, __p3_43) __extension__ ({ \ + uint64x1_t __s0_43 = __p0_43; \ + uint64x1_t __s2_43 = __p2_43; \ + uint64x1_t __ret_43; \ + __ret_43 = __noswap_vset_lane_u64(__noswap_vget_lane_u64(__s2_43, __p3_43), __s0_43, __p1_43); \ + __ret_43; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = __p0 / __p1; - return __ret; -} +#define vcopy_lane_u16(__p0_44, __p1_44, __p2_44, __p3_44) __extension__ ({ \ + uint16x4_t __s0_44 = __p0_44; \ + uint16x4_t __s2_44 = __p2_44; \ + uint16x4_t __ret_44; \ + __ret_44 = vset_lane_u16(vget_lane_u16(__s2_44, __p3_44), __s0_44, __p1_44); \ + __ret_44; \ +}) #else -__ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vcopy_lane_u16(__p0_45, __p1_45, __p2_45, __p3_45) __extension__ ({ \ + uint16x4_t __s0_45 = __p0_45; \ + uint16x4_t __s2_45 = __p2_45; \ + uint16x4_t __rev0_45; __rev0_45 = __builtin_shufflevector(__s0_45, __s0_45, 3, 2, 1, 0); \ + uint16x4_t __rev2_45; __rev2_45 = __builtin_shufflevector(__s2_45, __s2_45, 3, 2, 1, 0); \ + uint16x4_t __ret_45; \ + __ret_45 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_45, __p3_45), __rev0_45, __p1_45); \ + __ret_45 = __builtin_shufflevector(__ret_45, __ret_45, 3, 2, 1, 0); \ + __ret_45; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = __p0 / __p1; - return __ret; -} +#define vcopy_lane_s8(__p0_46, __p1_46, __p2_46, __p3_46) __extension__ ({ \ + int8x8_t __s0_46 = __p0_46; \ + int8x8_t __s2_46 = __p2_46; \ + int8x8_t __ret_46; \ + __ret_46 = vset_lane_s8(vget_lane_s8(__s2_46, __p3_46), __s0_46, __p1_46); \ + __ret_46; \ +}) #else -__ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vcopy_lane_s8(__p0_47, __p1_47, __p2_47, __p3_47) __extension__ ({ \ + int8x8_t __s0_47 = __p0_47; \ + int8x8_t __s2_47 = __p2_47; \ + int8x8_t __rev0_47; __rev0_47 = __builtin_shufflevector(__s0_47, __s0_47, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_47; __rev2_47 = __builtin_shufflevector(__s2_47, __s2_47, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_47; \ + __ret_47 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_47, __p3_47), __rev0_47, __p1_47); \ + __ret_47 = __builtin_shufflevector(__ret_47, __ret_47, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_47; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 / __p1; - return __ret; -} +#define vcopy_lane_f32(__p0_48, __p1_48, __p2_48, __p3_48) __extension__ ({ \ + float32x2_t __s0_48 = __p0_48; \ + float32x2_t __s2_48 = __p2_48; \ + float32x2_t __ret_48; \ + __ret_48 = vset_lane_f32(vget_lane_f32(__s2_48, __p3_48), __s0_48, __p1_48); \ + __ret_48; \ +}) #else -__ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 / __p1; - return __ret; -} +#define vcopy_lane_f32(__p0_49, __p1_49, __p2_49, __p3_49) __extension__ ({ \ + float32x2_t __s0_49 = __p0_49; \ + float32x2_t __s2_49 = __p2_49; \ + float32x2_t __rev0_49; __rev0_49 = __builtin_shufflevector(__s0_49, __s0_49, 1, 0); \ + float32x2_t __rev2_49; __rev2_49 = __builtin_shufflevector(__s2_49, __s2_49, 1, 0); \ + float32x2_t __ret_49; \ + __ret_49 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_49, __p3_49), __rev0_49, __p1_49); \ + __ret_49 = __builtin_shufflevector(__ret_49, __ret_49, 1, 0); \ + __ret_49; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = __p0 / __p1; - return __ret; -} +#define vcopy_lane_s32(__p0_50, __p1_50, __p2_50, __p3_50) __extension__ ({ \ + int32x2_t __s0_50 = __p0_50; \ + int32x2_t __s2_50 = __p2_50; \ + int32x2_t __ret_50; \ + __ret_50 = vset_lane_s32(vget_lane_s32(__s2_50, __p3_50), __s0_50, __p1_50); \ + __ret_50; \ +}) #else -__ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vcopy_lane_s32(__p0_51, __p1_51, __p2_51, __p3_51) __extension__ ({ \ + int32x2_t __s0_51 = __p0_51; \ + int32x2_t __s2_51 = __p2_51; \ + int32x2_t __rev0_51; __rev0_51 = __builtin_shufflevector(__s0_51, __s0_51, 1, 0); \ + int32x2_t __rev2_51; __rev2_51 = __builtin_shufflevector(__s2_51, __s2_51, 1, 0); \ + int32x2_t __ret_51; \ + __ret_51 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_51, __p3_51), __rev0_51, __p1_51); \ + __ret_51 = __builtin_shufflevector(__ret_51, __ret_51, 1, 0); \ + __ret_51; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ - poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopy_lane_s64(__p0_52, __p1_52, __p2_52, __p3_52) __extension__ ({ \ + int64x1_t __s0_52 = __p0_52; \ + int64x1_t __s2_52 = __p2_52; \ + int64x1_t __ret_52; \ + __ret_52 = vset_lane_s64(vget_lane_s64(__s2_52, __p3_52), __s0_52, __p1_52); \ + __ret_52; \ }) #else -#define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ - poly8x8_t __s0 = __p0; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopy_lane_s64(__p0_53, __p1_53, __p2_53, __p3_53) __extension__ ({ \ + int64x1_t __s0_53 = __p0_53; \ + int64x1_t __s2_53 = __p2_53; \ + int64x1_t __ret_53; \ + __ret_53 = __noswap_vset_lane_s64(__noswap_vget_lane_s64(__s2_53, __p3_53), __s0_53, __p1_53); \ + __ret_53; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_lane_p16(__p0, __p1) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ - poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopy_lane_s16(__p0_54, __p1_54, __p2_54, __p3_54) __extension__ ({ \ + int16x4_t __s0_54 = __p0_54; \ + int16x4_t __s2_54 = __p2_54; \ + int16x4_t __ret_54; \ + __ret_54 = vset_lane_s16(vget_lane_s16(__s2_54, __p3_54), __s0_54, __p1_54); \ + __ret_54; \ }) #else -#define vduph_lane_p16(__p0, __p1) __extension__ ({ \ - poly16x4_t __s0 = __p0; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopy_lane_s16(__p0_55, __p1_55, __p2_55, __p3_55) __extension__ ({ \ + int16x4_t __s0_55 = __p0_55; \ + int16x4_t __s2_55 = __p2_55; \ + int16x4_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 3, 2, 1, 0); \ + int16x4_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 3, 2, 1, 0); \ + int16x4_t __ret_55; \ + __ret_55 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_55, __p3_55), __rev0_55, __p1_55); \ + __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 3, 2, 1, 0); \ + __ret_55; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_p8(__p0_56, __p1_56, __p2_56, __p3_56) __extension__ ({ \ + poly8x16_t __s0_56 = __p0_56; \ + poly8x16_t __s2_56 = __p2_56; \ + poly8x16_t __ret_56; \ + __ret_56 = vsetq_lane_p8(vgetq_lane_p8(__s2_56, __p3_56), __s0_56, __p1_56); \ + __ret_56; \ }) #else -#define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ - uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_p8(__p0_57, __p1_57, __p2_57, __p3_57) __extension__ ({ \ + poly8x16_t __s0_57 = __p0_57; \ + poly8x16_t __s2_57 = __p2_57; \ + poly8x16_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_57; \ + __ret_57 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_57, __p3_57), __rev0_57, __p1_57); \ + __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_57; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_lane_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_p16(__p0_58, __p1_58, __p2_58, __p3_58) __extension__ ({ \ + poly16x8_t __s0_58 = __p0_58; \ + poly16x8_t __s2_58 = __p2_58; \ + poly16x8_t __ret_58; \ + __ret_58 = vsetq_lane_p16(vgetq_lane_p16(__s2_58, __p3_58), __s0_58, __p1_58); \ + __ret_58; \ }) #else -#define vdups_lane_u32(__p0, __p1) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_p16(__p0_59, __p1_59, __p2_59, __p3_59) __extension__ ({ \ + poly16x8_t __s0_59 = __p0_59; \ + poly16x8_t __s2_59 = __p2_59; \ + poly16x8_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret_59; \ + __ret_59 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_59, __p3_59), __rev0_59, __p1_59); \ + __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_59; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u8(__p0_60, __p1_60, __p2_60, __p3_60) __extension__ ({ \ + uint8x16_t __s0_60 = __p0_60; \ + uint8x16_t __s2_60 = __p2_60; \ + uint8x16_t __ret_60; \ + __ret_60 = vsetq_lane_u8(vgetq_lane_u8(__s2_60, __p3_60), __s0_60, __p1_60); \ + __ret_60; \ }) #else -#define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u8(__p0_61, __p1_61, __p2_61, __p3_61) __extension__ ({ \ + uint8x16_t __s0_61 = __p0_61; \ + uint8x16_t __s2_61 = __p2_61; \ + uint8x16_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_61; \ + __ret_61 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_61, __p3_61), __rev0_61, __p1_61); \ + __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_61; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_lane_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u32(__p0_62, __p1_62, __p2_62, __p3_62) __extension__ ({ \ + uint32x4_t __s0_62 = __p0_62; \ + uint32x4_t __s2_62 = __p2_62; \ + uint32x4_t __ret_62; \ + __ret_62 = vsetq_lane_u32(vgetq_lane_u32(__s2_62, __p3_62), __s0_62, __p1_62); \ + __ret_62; \ }) #else -#define vduph_lane_u16(__p0, __p1) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_u32(__p0_63, __p1_63, __p2_63, __p3_63) __extension__ ({ \ + uint32x4_t __s0_63 = __p0_63; \ + uint32x4_t __s2_63 = __p2_63; \ + uint32x4_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 3, 2, 1, 0); \ + uint32x4_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 3, 2, 1, 0); \ + uint32x4_t __ret_63; \ + __ret_63 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_63, __p3_63), __rev0_63, __p1_63); \ + __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 3, 2, 1, 0); \ + __ret_63; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u64(__p0_64, __p1_64, __p2_64, __p3_64) __extension__ ({ \ + uint64x2_t __s0_64 = __p0_64; \ + uint64x2_t __s2_64 = __p2_64; \ + uint64x2_t __ret_64; \ + __ret_64 = vsetq_lane_u64(vgetq_lane_u64(__s2_64, __p3_64), __s0_64, __p1_64); \ + __ret_64; \ }) #else -#define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ - int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_u64(__p0_65, __p1_65, __p2_65, __p3_65) __extension__ ({ \ + uint64x2_t __s0_65 = __p0_65; \ + uint64x2_t __s2_65 = __p2_65; \ + uint64x2_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 1, 0); \ + uint64x2_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 1, 0); \ + uint64x2_t __ret_65; \ + __ret_65 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_65, __p3_65), __rev0_65, __p1_65); \ + __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 1, 0); \ + __ret_65; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u16(__p0_66, __p1_66, __p2_66, __p3_66) __extension__ ({ \ + uint16x8_t __s0_66 = __p0_66; \ + uint16x8_t __s2_66 = __p2_66; \ + uint16x8_t __ret_66; \ + __ret_66 = vsetq_lane_u16(vgetq_lane_u16(__s2_66, __p3_66), __s0_66, __p1_66); \ + __ret_66; \ }) #else -#define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_u16(__p0_67, __p1_67, __p2_67, __p3_67) __extension__ ({ \ + uint16x8_t __s0_67 = __p0_67; \ + uint16x8_t __s2_67 = __p2_67; \ + uint16x8_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_67; \ + __ret_67 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_67, __p3_67), __rev0_67, __p1_67); \ + __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_67; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_lane_f32(__p0, __p1) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s8(__p0_68, __p1_68, __p2_68, __p3_68) __extension__ ({ \ + int8x16_t __s0_68 = __p0_68; \ + int8x16_t __s2_68 = __p2_68; \ + int8x16_t __ret_68; \ + __ret_68 = vsetq_lane_s8(vgetq_lane_s8(__s2_68, __p3_68), __s0_68, __p1_68); \ + __ret_68; \ }) #else -#define vdups_lane_f32(__p0, __p1) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_s8(__p0_69, __p1_69, __p2_69, __p3_69) __extension__ ({ \ + int8x16_t __s0_69 = __p0_69; \ + int8x16_t __s2_69 = __p2_69; \ + int8x16_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_69; \ + __ret_69 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_69, __p3_69), __rev0_69, __p1_69); \ + __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_69; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_lane_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_f32(__p0_70, __p1_70, __p2_70, __p3_70) __extension__ ({ \ + float32x4_t __s0_70 = __p0_70; \ + float32x4_t __s2_70 = __p2_70; \ + float32x4_t __ret_70; \ + __ret_70 = vsetq_lane_f32(vgetq_lane_f32(__s2_70, __p3_70), __s0_70, __p1_70); \ + __ret_70; \ }) #else -#define vdups_lane_s32(__p0, __p1) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_f32(__p0_71, __p1_71, __p2_71, __p3_71) __extension__ ({ \ + float32x4_t __s0_71 = __p0_71; \ + float32x4_t __s2_71 = __p2_71; \ + float32x4_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 3, 2, 1, 0); \ + float32x4_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 3, 2, 1, 0); \ + float32x4_t __ret_71; \ + __ret_71 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_71, __p3_71), __rev0_71, __p1_71); \ + __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, 3, 2, 1, 0); \ + __ret_71; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s32(__p0_72, __p1_72, __p2_72, __p3_72) __extension__ ({ \ + int32x4_t __s0_72 = __p0_72; \ + int32x4_t __s2_72 = __p2_72; \ + int32x4_t __ret_72; \ + __ret_72 = vsetq_lane_s32(vgetq_lane_s32(__s2_72, __p3_72), __s0_72, __p1_72); \ + __ret_72; \ }) #else -#define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s32(__p0_73, __p1_73, __p2_73, __p3_73) __extension__ ({ \ + int32x4_t __s0_73 = __p0_73; \ + int32x4_t __s2_73 = __p2_73; \ + int32x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 3, 2, 1, 0); \ + int32x4_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 3, 2, 1, 0); \ + int32x4_t __ret_73; \ + __ret_73 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_73, __p3_73), __rev0_73, __p1_73); \ + __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 3, 2, 1, 0); \ + __ret_73; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_lane_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s64(__p0_74, __p1_74, __p2_74, __p3_74) __extension__ ({ \ + int64x2_t __s0_74 = __p0_74; \ + int64x2_t __s2_74 = __p2_74; \ + int64x2_t __ret_74; \ + __ret_74 = vsetq_lane_s64(vgetq_lane_s64(__s2_74, __p3_74), __s0_74, __p1_74); \ + __ret_74; \ }) #else -#define vduph_lane_s16(__p0, __p1) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ - __ret; \ +#define vcopyq_laneq_s64(__p0_75, __p1_75, __p2_75, __p3_75) __extension__ ({ \ + int64x2_t __s0_75 = __p0_75; \ + int64x2_t __s2_75 = __p2_75; \ + int64x2_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 1, 0); \ + int64x2_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 1, 0); \ + int64x2_t __ret_75; \ + __ret_75 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_75, __p3_75), __rev0_75, __p1_75); \ + __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 1, 0); \ + __ret_75; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s16(__p0_76, __p1_76, __p2_76, __p3_76) __extension__ ({ \ + int16x8_t __s0_76 = __p0_76; \ + int16x8_t __s2_76 = __p2_76; \ + int16x8_t __ret_76; \ + __ret_76 = vsetq_lane_s16(vgetq_lane_s16(__s2_76, __p3_76), __s0_76, __p1_76); \ + __ret_76; \ }) #else -#define vdup_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ +#define vcopyq_laneq_s16(__p0_77, __p1_77, __p2_77, __p3_77) __extension__ ({ \ + int16x8_t __s0_77 = __p0_77; \ + int16x8_t __s2_77 = __p2_77; \ + int16x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_77; \ + __ret_77 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_77, __p3_77), __rev0_77, __p1_77); \ + __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_77; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret; \ +#define vcopy_laneq_p8(__p0_78, __p1_78, __p2_78, __p3_78) __extension__ ({ \ + poly8x8_t __s0_78 = __p0_78; \ + poly8x16_t __s2_78 = __p2_78; \ + poly8x8_t __ret_78; \ + __ret_78 = vset_lane_p8(vgetq_lane_p8(__s2_78, __p3_78), __s0_78, __p1_78); \ + __ret_78; \ }) #else -#define vdupq_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vcopy_laneq_p8(__p0_79, __p1_79, __p2_79, __p3_79) __extension__ ({ \ + poly8x8_t __s0_79 = __p0_79; \ + poly8x16_t __s2_79 = __p2_79; \ + poly8x8_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_79; \ + __ret_79 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_79, __p3_79), __rev0_79, __p1_79); \ + __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_79; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret; \ +#define vcopy_laneq_p16(__p0_80, __p1_80, __p2_80, __p3_80) __extension__ ({ \ + poly16x4_t __s0_80 = __p0_80; \ + poly16x8_t __s2_80 = __p2_80; \ + poly16x4_t __ret_80; \ + __ret_80 = vset_lane_p16(vgetq_lane_p16(__s2_80, __p3_80), __s0_80, __p1_80); \ + __ret_80; \ }) #else -#define vdupq_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ +#define vcopy_laneq_p16(__p0_81, __p1_81, __p2_81, __p3_81) __extension__ ({ \ + poly16x4_t __s0_81 = __p0_81; \ + poly16x8_t __s2_81 = __p2_81; \ + poly16x4_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 3, 2, 1, 0); \ + poly16x8_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __ret_81; \ + __ret_81 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_81, __p3_81), __rev0_81, __p1_81); \ + __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 3, 2, 1, 0); \ + __ret_81; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret; \ +#define vcopy_laneq_u8(__p0_82, __p1_82, __p2_82, __p3_82) __extension__ ({ \ + uint8x8_t __s0_82 = __p0_82; \ + uint8x16_t __s2_82 = __p2_82; \ + uint8x8_t __ret_82; \ + __ret_82 = vset_lane_u8(vgetq_lane_u8(__s2_82, __p3_82), __s0_82, __p1_82); \ + __ret_82; \ }) #else -#define vdupq_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vcopy_laneq_u8(__p0_83, __p1_83, __p2_83, __p3_83) __extension__ ({ \ + uint8x8_t __s0_83 = __p0_83; \ + uint8x16_t __s2_83 = __p2_83; \ + uint8x8_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_83; \ + __ret_83 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_83, __p3_83), __rev0_83, __p1_83); \ + __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_83; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ +#define vcopy_laneq_u32(__p0_84, __p1_84, __p2_84, __p3_84) __extension__ ({ \ + uint32x2_t __s0_84 = __p0_84; \ + uint32x4_t __s2_84 = __p2_84; \ + uint32x2_t __ret_84; \ + __ret_84 = vset_lane_u32(vgetq_lane_u32(__s2_84, __p3_84), __s0_84, __p1_84); \ + __ret_84; \ }) #else -#define vdup_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ +#define vcopy_laneq_u32(__p0_85, __p1_85, __p2_85, __p3_85) __extension__ ({ \ + uint32x2_t __s0_85 = __p0_85; \ + uint32x4_t __s2_85 = __p2_85; \ + uint32x2_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, 1, 0); \ + uint32x4_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 3, 2, 1, 0); \ + uint32x2_t __ret_85; \ + __ret_85 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_85, __p3_85), __rev0_85, __p1_85); \ + __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, 1, 0); \ + __ret_85; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ - __ret; \ +#define vcopy_laneq_u64(__p0_86, __p1_86, __p2_86, __p3_86) __extension__ ({ \ + uint64x1_t __s0_86 = __p0_86; \ + uint64x2_t __s2_86 = __p2_86; \ + uint64x1_t __ret_86; \ + __ret_86 = vset_lane_u64(vgetq_lane_u64(__s2_86, __p3_86), __s0_86, __p1_86); \ + __ret_86; \ }) #else -#define vdup_lane_f16(__p0, __p1) __extension__ ({ \ - float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vcopy_laneq_u64(__p0_87, __p1_87, __p2_87, __p3_87) __extension__ ({ \ + uint64x1_t __s0_87 = __p0_87; \ + uint64x2_t __s2_87 = __p2_87; \ + uint64x2_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, 1, 0); \ + uint64x1_t __ret_87; \ + __ret_87 = __noswap_vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_87, __p3_87), __s0_87, __p1_87); \ + __ret_87; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_u16(__p0_88, __p1_88, __p2_88, __p3_88) __extension__ ({ \ + uint16x4_t __s0_88 = __p0_88; \ + uint16x8_t __s2_88 = __p2_88; \ + uint16x4_t __ret_88; \ + __ret_88 = vset_lane_u16(vgetq_lane_u16(__s2_88, __p3_88), __s0_88, __p1_88); \ + __ret_88; \ }) #else -#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_u16(__p0_89, __p1_89, __p2_89, __p3_89) __extension__ ({ \ + uint16x4_t __s0_89 = __p0_89; \ + uint16x8_t __s2_89 = __p2_89; \ + uint16x4_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 3, 2, 1, 0); \ + uint16x8_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_89; \ + __ret_89 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_89, __p3_89), __rev0_89, __p1_89); \ + __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 3, 2, 1, 0); \ + __ret_89; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_s8(__p0_90, __p1_90, __p2_90, __p3_90) __extension__ ({ \ + int8x8_t __s0_90 = __p0_90; \ + int8x16_t __s2_90 = __p2_90; \ + int8x8_t __ret_90; \ + __ret_90 = vset_lane_s8(vgetq_lane_s8(__s2_90, __p3_90), __s0_90, __p1_90); \ + __ret_90; \ }) #else -#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_s8(__p0_91, __p1_91, __p2_91, __p3_91) __extension__ ({ \ + int8x8_t __s0_91 = __p0_91; \ + int8x16_t __s2_91 = __p2_91; \ + int8x8_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_91; \ + __ret_91 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_91, __p3_91), __rev0_91, __p1_91); \ + __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_91; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_f32(__p0_92, __p1_92, __p2_92, __p3_92) __extension__ ({ \ + float32x2_t __s0_92 = __p0_92; \ + float32x4_t __s2_92 = __p2_92; \ + float32x2_t __ret_92; \ + __ret_92 = vset_lane_f32(vgetq_lane_f32(__s2_92, __p3_92), __s0_92, __p1_92); \ + __ret_92; \ }) #else -#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_f32(__p0_93, __p1_93, __p2_93, __p3_93) __extension__ ({ \ + float32x2_t __s0_93 = __p0_93; \ + float32x4_t __s2_93 = __p2_93; \ + float32x2_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 1, 0); \ + float32x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, 3, 2, 1, 0); \ + float32x2_t __ret_93; \ + __ret_93 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_93, __p3_93), __rev0_93, __p1_93); \ + __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, 1, 0); \ + __ret_93; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_s32(__p0_94, __p1_94, __p2_94, __p3_94) __extension__ ({ \ + int32x2_t __s0_94 = __p0_94; \ + int32x4_t __s2_94 = __p2_94; \ + int32x2_t __ret_94; \ + __ret_94 = vset_lane_s32(vgetq_lane_s32(__s2_94, __p3_94), __s0_94, __p1_94); \ + __ret_94; \ }) #else -#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_s32(__p0_95, __p1_95, __p2_95, __p3_95) __extension__ ({ \ + int32x2_t __s0_95 = __p0_95; \ + int32x4_t __s2_95 = __p2_95; \ + int32x2_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, 1, 0); \ + int32x4_t __rev2_95; __rev2_95 = __builtin_shufflevector(__s2_95, __s2_95, 3, 2, 1, 0); \ + int32x2_t __ret_95; \ + __ret_95 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_95, __p3_95), __rev0_95, __p1_95); \ + __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, 1, 0); \ + __ret_95; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_s64(__p0_96, __p1_96, __p2_96, __p3_96) __extension__ ({ \ + int64x1_t __s0_96 = __p0_96; \ + int64x2_t __s2_96 = __p2_96; \ + int64x1_t __ret_96; \ + __ret_96 = vset_lane_s64(vgetq_lane_s64(__s2_96, __p3_96), __s0_96, __p1_96); \ + __ret_96; \ }) #else -#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_s64(__p0_97, __p1_97, __p2_97, __p3_97) __extension__ ({ \ + int64x1_t __s0_97 = __p0_97; \ + int64x2_t __s2_97 = __p2_97; \ + int64x2_t __rev2_97; __rev2_97 = __builtin_shufflevector(__s2_97, __s2_97, 1, 0); \ + int64x1_t __ret_97; \ + __ret_97 = __noswap_vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_97, __p3_97), __s0_97, __p1_97); \ + __ret_97; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ - __ret; \ +#define vcopy_laneq_s16(__p0_98, __p1_98, __p2_98, __p3_98) __extension__ ({ \ + int16x4_t __s0_98 = __p0_98; \ + int16x8_t __s2_98 = __p2_98; \ + int16x4_t __ret_98; \ + __ret_98 = vset_lane_s16(vgetq_lane_s16(__s2_98, __p3_98), __s0_98, __p1_98); \ + __ret_98; \ }) #else -#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ - __ret; \ +#define vcopy_laneq_s16(__p0_99, __p1_99, __p2_99, __p3_99) __extension__ ({ \ + int16x4_t __s0_99 = __p0_99; \ + int16x8_t __s2_99 = __p2_99; \ + int16x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, 3, 2, 1, 0); \ + int16x8_t __rev2_99; __rev2_99 = __builtin_shufflevector(__s2_99, __s2_99, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_99; \ + __ret_99 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_99, __p3_99), __rev0_99, __p1_99); \ + __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, 3, 2, 1, 0); \ + __ret_99; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai poly64x1_t vcreate_p64(uint64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); + return __ret; +} #else -#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai poly64x1_t vcreate_p64(uint64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai float64x1_t vcreate_f64(uint64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); + return __ret; +} #else -#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai float64x1_t vcreate_f64(uint64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai float32_t vcvts_f32_s32(int32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); + return __ret; +} #else -#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai float32_t vcvts_f32_s32(int32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai float32_t vcvts_f32_u32(uint32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); + return __ret; +} #else -#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai float32_t vcvts_f32_u32(uint32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); + return __ret; +} #else -#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float32x2_t __noswap_vcvt_f32_f64(float64x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ - __ret; \ -}) +__ai float64_t vcvtd_f64_s64(int64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); + return __ret; +} #else -#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ - __ret; \ -}) +__ai float64_t vcvtd_f64_s64(int64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float64_t vcvtd_f64_u64(uint64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); + return __ret; +} #else -#define vdup_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64_t vcvtd_f64_u64(uint64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) +__ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 51); + return __ret; +} #else -#define vdup_laneq_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x1_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ - __ret; \ -}) +__ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 35); + return __ret; +} #else -#define vdup_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); + return __ret; +} #else -#define vdupq_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret; \ -}) +__ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); + return __ret; +} #else -#define vdupq_laneq_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); + return __ret; +} #else -#define vdupq_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vcvt_f64_f32(float32x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { + float16x8_t __ret; + __ret = vcombine_f16(__p0, vcvt_f16_f32(__p1)); + return __ret; +} #else -#define vdupq_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x8_t __ret; + __ret = __noswap_vcombine_f16(__rev0, __noswap_vcvt_f16_f32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { + float32x4_t __ret; + __ret = vcvt_f32_f16(vget_high_f16(__p0)); + return __ret; +} #else -#define vdupq_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __noswap_vcvt_f32_f16(__noswap_vget_high_f16(__rev0)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret; \ -}) +__ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { + float32x4_t __ret; + __ret = vcombine_f32(__p0, vcvt_f32_f64(__p1)); + return __ret; +} #else -#define vdupq_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x4_t __ret; + __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvt_f32_f64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +__ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { + float64x2_t __ret; + __ret = vcvt_f64_f32(vget_high_f32(__p0)); + return __ret; +} +#else +__ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float64x2_t __ret; + __ret = __noswap_vcvt_f64_f32(__noswap_vget_high_f32(__rev0)); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ __ret; \ }) #else -#define vdupq_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +#define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ __ret; \ }) #else -#define vdupq_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ +#define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ float64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ + __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else -#define vdupq_laneq_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ +#define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ +#define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else -#define vdupq_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 35); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +#define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #else -#define vdupq_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ +#define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #else -#define vdupq_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ +#define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ __ret; \ }) #else -#define vdupq_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +#define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ __ret; \ }) #else -#define vdupq_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +#define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ + float32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ __ret; \ }) #else -#define vdup_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ + float32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ +#define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else -#define vdup_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ +#define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ +#define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #else -#define vdup_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x1_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ +#define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ +#define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ __ret; \ }) #else -#define vdup_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ + float64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x8_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ +#define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ + float32_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ __ret; \ }) #else -#define vdup_laneq_s8(__p0, __p1) __extension__ ({ \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ + float32_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f64(__p0, __p1) __extension__ ({ \ +#define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else -#define vdup_laneq_f64(__p0, __p1) __extension__ ({ \ +#define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x1_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ + uint64x2_t __ret; \ + __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__rev0, __p1, 51); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ +#define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #else -#define vdup_laneq_f32(__p0, __p1) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ +#define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ + float64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ __ret; \ }) #else -#define vdup_laneq_f16(__p0, __p1) __extension__ ({ \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ +#define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ + float64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x2_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ - __ret; \ -}) +__ai int32_t vcvts_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); + return __ret; +} #else -#define vdup_laneq_s32(__p0, __p1) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x2_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int32_t vcvts_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) +__ai int64_t vcvtd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); + return __ret; +} #else -#define vdup_laneq_s64(__p0, __p1) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x1_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ - __ret; \ -}) +__ai int64_t vcvtd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x4_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ - __ret; \ -}) +__ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__p0, 35); + return __ret; +} #else -#define vdup_laneq_s16(__p0, __p1) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vdup_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; +__ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); return __ret; } #else -__ai poly64x1_t vdup_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; +__ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vdupq_n_p64(poly64_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t) {__p0, __p0}; +__ai uint32_t vcvts_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); return __ret; } #else -__ai poly64x2_t vdupq_n_p64(poly64_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint32_t vcvts_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vdupq_n_f64(float64_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) {__p0, __p0}; +__ai uint64_t vcvtd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); return __ret; } #else -__ai float64x2_t vdupq_n_f64(float64_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint64_t vcvtd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vdup_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; +__ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__p0, 51); return __ret; } #else -__ai float64x1_t vdup_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; +__ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__rev0, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vext_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) +__ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); + return __ret; +} #else -#define vext_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) +__ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ - __ret; \ -}) +__ai int32_t vcvtas_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); + return __ret; +} #else -#define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int32_t vcvtas_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 42); \ - __ret; \ -}) +__ai int64_t vcvtad_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); + return __ret; +} #else -#define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int64_t vcvtad_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vext_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ - __ret; \ -}) +__ai uint32_t vcvtas_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); + return __ret; +} #else -#define vext_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ - __ret; \ -}) +__ai uint32_t vcvtas_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); +__ai uint64_t vcvtad_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); return __ret; } #else -__ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float64x2_t __noswap_vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); +__ai uint64_t vcvtad_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); +__ai int32_t vcvtms_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); return __ret; } #else -__ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -__ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); +__ai int32_t vcvtms_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai int64_t vcvtmd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); + return __ret; +} #else -#define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) -#define __noswap_vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai int64_t vcvtmd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai uint32_t vcvtms_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); + return __ret; +} #else -#define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) -#define __noswap_vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai uint32_t vcvtms_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ - __ret; \ -}) +__ai uint64_t vcvtmd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); + return __ret; +} #else -#define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__s2, __p3, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ - __ret; \ -}) +__ai uint64_t vcvtmd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ - __ret; \ -}) +__ai int32_t vcvtns_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); + return __ret; +} #else -#define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ - __ret; \ -}) +__ai int32_t vcvtns_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ - __ret; \ -}) +__ai int64_t vcvtnd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); + return __ret; +} #else -#define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ - __ret; \ -}) -#define __noswap_vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ - __ret; \ -}) +__ai int64_t vcvtnd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ - __ret; \ -}) +__ai uint32_t vcvtns_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); + return __ret; +} #else -#define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __s2 = __p2; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ - __ret; \ -}) +__ai uint32_t vcvtns_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai uint64_t vcvtnd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); + return __ret; +} #else -#define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) -#define __noswap_vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai uint64_t vcvtnd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai int32_t vcvtps_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); + return __ret; +} #else -#define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) -#define __noswap_vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32_t __s0 = __p0; \ - float32_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai int32_t vcvtps_s32_f32(float32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ - __ret; \ -}) +__ai int64_t vcvtpd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); + return __ret; +} #else -#define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ - __ret; \ -}) +__ai int64_t vcvtpd_s64_f64(float64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ - __ret; \ -}) +__ai uint32_t vcvtps_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); + return __ret; +} #else -#define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#define __noswap_vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ - __ret; \ -}) +__ai uint32_t vcvtps_u32_f32(float32_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ - __ret; \ -}) +__ai uint64_t vcvtpd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); + return __ret; +} #else -#define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__rev2, __p3, 10); \ - __ret; \ -}) -#define __noswap_vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x2_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ - __ret; \ -}) +__ai uint64_t vcvtpd_u64_f64(float64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ - __ret; \ -}) +__ai float32_t vcvtxd_f32_f64(float64_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); + return __ret; +} #else -#define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ - __ret; \ -}) +__ai float32_t vcvtxd_f32_f64(float64_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = vfmaq_f64(__p0, __p1, (float64x2_t) {__p2, __p2}); +__ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); return __ret; } #else -__ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { +__ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __noswap_vfmaq_f64(__rev0, __rev1, (float64x2_t) {__p2, __p2}); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } +__ai float32x2_t __noswap_vcvtx_f32_f64(float64x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { +__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; - __ret = vfmaq_f32(__p0, __p1, (float32x4_t) {__p2, __p2, __p2, __p2}); + __ret = vcombine_f32(__p0, vcvtx_f32_f64(__p1)); return __ret; } #else -__ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); +__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x4_t __ret; - __ret = __noswap_vfmaq_f32(__rev0, __rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); + __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvtx_f32_f64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = vfma_f64(__p0, __p1, (float64x1_t) {__p2}); +__ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = __p0 / __p1; return __ret; } #else -__ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, __p1, (float64x1_t) {__p2}); +__ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __rev0 / __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { - float32x2_t __ret; - __ret = vfma_f32(__p0, __p1, (float32x2_t) {__p2, __p2}); +__ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = __p0 / __p1; return __ret; -} -#else -__ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = __noswap_vfma_f32(__rev0, __rev1, (float32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +} +#else +__ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __rev0 / __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = vfmaq_f64(__p0, -__p1, __p2); +__ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 / __p1; return __ret; } #else -__ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float64x2_t __ret; - __ret = __noswap_vfmaq_f64(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 / __p1; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = vfma_f64(__p0, -__p1, __p2); +__ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = __p0 / __p1; return __ret; } #else -__ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, -__p1, __p2); +__ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = __rev0 / __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsd_lane_f64(__p0_100, __p1_100, __p2_100, __p3_100) __extension__ ({ \ - float64_t __s0_100 = __p0_100; \ - float64_t __s1_100 = __p1_100; \ - float64x1_t __s2_100 = __p2_100; \ - float64_t __ret_100; \ - __ret_100 = vfmad_lane_f64(__s0_100, -__s1_100, __s2_100, __p3_100); \ - __ret_100; \ +#define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8_t __ret; \ + __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsd_lane_f64(__p0_101, __p1_101, __p2_101, __p3_101) __extension__ ({ \ - float64_t __s0_101 = __p0_101; \ - float64_t __s1_101 = __p1_101; \ - float64x1_t __s2_101 = __p2_101; \ - float64_t __ret_101; \ - __ret_101 = __noswap_vfmad_lane_f64(__s0_101, -__s1_101, __s2_101, __p3_101); \ - __ret_101; \ +#define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ + poly8x8_t __s0 = __p0; \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8_t __ret; \ + __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmss_lane_f32(__p0_102, __p1_102, __p2_102, __p3_102) __extension__ ({ \ - float32_t __s0_102 = __p0_102; \ - float32_t __s1_102 = __p1_102; \ - float32x2_t __s2_102 = __p2_102; \ - float32_t __ret_102; \ - __ret_102 = vfmas_lane_f32(__s0_102, -__s1_102, __s2_102, __p3_102); \ - __ret_102; \ +#define vduph_lane_p16(__p0, __p1) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16_t __ret; \ + __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmss_lane_f32(__p0_103, __p1_103, __p2_103, __p3_103) __extension__ ({ \ - float32_t __s0_103 = __p0_103; \ - float32_t __s1_103 = __p1_103; \ - float32x2_t __s2_103 = __p2_103; \ - float32x2_t __rev2_103; __rev2_103 = __builtin_shufflevector(__s2_103, __s2_103, 1, 0); \ - float32_t __ret_103; \ - __ret_103 = __noswap_vfmas_lane_f32(__s0_103, -__s1_103, __rev2_103, __p3_103); \ - __ret_103; \ +#define vduph_lane_p16(__p0, __p1) __extension__ ({ \ + poly16x4_t __s0 = __p0; \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + poly16_t __ret; \ + __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f64(__p0_104, __p1_104, __p2_104, __p3_104) __extension__ ({ \ - float64x2_t __s0_104 = __p0_104; \ - float64x2_t __s1_104 = __p1_104; \ - float64x1_t __s2_104 = __p2_104; \ - float64x2_t __ret_104; \ - __ret_104 = vfmaq_lane_f64(__s0_104, -__s1_104, __s2_104, __p3_104); \ - __ret_104; \ +#define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsq_lane_f64(__p0_105, __p1_105, __p2_105, __p3_105) __extension__ ({ \ - float64x2_t __s0_105 = __p0_105; \ - float64x2_t __s1_105 = __p1_105; \ - float64x1_t __s2_105 = __p2_105; \ - float64x2_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, 1, 0); \ - float64x2_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, 1, 0); \ - float64x2_t __ret_105; \ - __ret_105 = __noswap_vfmaq_lane_f64(__rev0_105, -__rev1_105, __s2_105, __p3_105); \ - __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, 1, 0); \ - __ret_105; \ +#define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f32(__p0_106, __p1_106, __p2_106, __p3_106) __extension__ ({ \ - float32x4_t __s0_106 = __p0_106; \ - float32x4_t __s1_106 = __p1_106; \ - float32x2_t __s2_106 = __p2_106; \ - float32x4_t __ret_106; \ - __ret_106 = vfmaq_lane_f32(__s0_106, -__s1_106, __s2_106, __p3_106); \ - __ret_106; \ +#define vdups_lane_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsq_lane_f32(__p0_107, __p1_107, __p2_107, __p3_107) __extension__ ({ \ - float32x4_t __s0_107 = __p0_107; \ - float32x4_t __s1_107 = __p1_107; \ - float32x2_t __s2_107 = __p2_107; \ - float32x4_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 3, 2, 1, 0); \ - float32x4_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 3, 2, 1, 0); \ - float32x2_t __rev2_107; __rev2_107 = __builtin_shufflevector(__s2_107, __s2_107, 1, 0); \ - float32x4_t __ret_107; \ - __ret_107 = __noswap_vfmaq_lane_f32(__rev0_107, -__rev1_107, __rev2_107, __p3_107); \ - __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 3, 2, 1, 0); \ - __ret_107; \ +#define vdups_lane_u32(__p0, __p1) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f64(__p0_108, __p1_108, __p2_108, __p3_108) __extension__ ({ \ - float64x1_t __s0_108 = __p0_108; \ - float64x1_t __s1_108 = __p1_108; \ - float64x1_t __s2_108 = __p2_108; \ - float64x1_t __ret_108; \ - __ret_108 = vfma_lane_f64(__s0_108, -__s1_108, __s2_108, __p3_108); \ - __ret_108; \ +#define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfms_lane_f64(__p0_109, __p1_109, __p2_109, __p3_109) __extension__ ({ \ - float64x1_t __s0_109 = __p0_109; \ - float64x1_t __s1_109 = __p1_109; \ - float64x1_t __s2_109 = __p2_109; \ - float64x1_t __ret_109; \ - __ret_109 = __noswap_vfma_lane_f64(__s0_109, -__s1_109, __s2_109, __p3_109); \ - __ret_109; \ +#define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ + uint64x1_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f32(__p0_110, __p1_110, __p2_110, __p3_110) __extension__ ({ \ - float32x2_t __s0_110 = __p0_110; \ - float32x2_t __s1_110 = __p1_110; \ - float32x2_t __s2_110 = __p2_110; \ - float32x2_t __ret_110; \ - __ret_110 = vfma_lane_f32(__s0_110, -__s1_110, __s2_110, __p3_110); \ - __ret_110; \ +#define vduph_lane_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfms_lane_f32(__p0_111, __p1_111, __p2_111, __p3_111) __extension__ ({ \ - float32x2_t __s0_111 = __p0_111; \ - float32x2_t __s1_111 = __p1_111; \ - float32x2_t __s2_111 = __p2_111; \ - float32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, 1, 0); \ - float32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, 1, 0); \ - float32x2_t __rev2_111; __rev2_111 = __builtin_shufflevector(__s2_111, __s2_111, 1, 0); \ - float32x2_t __ret_111; \ - __ret_111 = __noswap_vfma_lane_f32(__rev0_111, -__rev1_111, __rev2_111, __p3_111); \ - __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, 1, 0); \ - __ret_111; \ +#define vduph_lane_u16(__p0, __p1) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsd_laneq_f64(__p0_112, __p1_112, __p2_112, __p3_112) __extension__ ({ \ - float64_t __s0_112 = __p0_112; \ - float64_t __s1_112 = __p1_112; \ - float64x2_t __s2_112 = __p2_112; \ - float64_t __ret_112; \ - __ret_112 = vfmad_laneq_f64(__s0_112, -__s1_112, __s2_112, __p3_112); \ - __ret_112; \ +#define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsd_laneq_f64(__p0_113, __p1_113, __p2_113, __p3_113) __extension__ ({ \ - float64_t __s0_113 = __p0_113; \ - float64_t __s1_113 = __p1_113; \ - float64x2_t __s2_113 = __p2_113; \ - float64x2_t __rev2_113; __rev2_113 = __builtin_shufflevector(__s2_113, __s2_113, 1, 0); \ - float64_t __ret_113; \ - __ret_113 = __noswap_vfmad_laneq_f64(__s0_113, -__s1_113, __rev2_113, __p3_113); \ - __ret_113; \ +#define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ + int8x8_t __s0 = __p0; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmss_laneq_f32(__p0_114, __p1_114, __p2_114, __p3_114) __extension__ ({ \ - float32_t __s0_114 = __p0_114; \ - float32_t __s1_114 = __p1_114; \ - float32x4_t __s2_114 = __p2_114; \ - float32_t __ret_114; \ - __ret_114 = vfmas_laneq_f32(__s0_114, -__s1_114, __s2_114, __p3_114); \ - __ret_114; \ +#define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmss_laneq_f32(__p0_115, __p1_115, __p2_115, __p3_115) __extension__ ({ \ - float32_t __s0_115 = __p0_115; \ - float32_t __s1_115 = __p1_115; \ - float32x4_t __s2_115 = __p2_115; \ - float32x4_t __rev2_115; __rev2_115 = __builtin_shufflevector(__s2_115, __s2_115, 3, 2, 1, 0); \ - float32_t __ret_115; \ - __ret_115 = __noswap_vfmas_laneq_f32(__s0_115, -__s1_115, __rev2_115, __p3_115); \ - __ret_115; \ +#define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f64(__p0_116, __p1_116, __p2_116, __p3_116) __extension__ ({ \ - float64x2_t __s0_116 = __p0_116; \ - float64x2_t __s1_116 = __p1_116; \ - float64x2_t __s2_116 = __p2_116; \ - float64x2_t __ret_116; \ - __ret_116 = vfmaq_laneq_f64(__s0_116, -__s1_116, __s2_116, __p3_116); \ - __ret_116; \ +#define vdups_lane_f32(__p0, __p1) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsq_laneq_f64(__p0_117, __p1_117, __p2_117, __p3_117) __extension__ ({ \ - float64x2_t __s0_117 = __p0_117; \ - float64x2_t __s1_117 = __p1_117; \ - float64x2_t __s2_117 = __p2_117; \ - float64x2_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, 1, 0); \ - float64x2_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, 1, 0); \ - float64x2_t __rev2_117; __rev2_117 = __builtin_shufflevector(__s2_117, __s2_117, 1, 0); \ - float64x2_t __ret_117; \ - __ret_117 = __noswap_vfmaq_laneq_f64(__rev0_117, -__rev1_117, __rev2_117, __p3_117); \ - __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, 1, 0); \ - __ret_117; \ +#define vdups_lane_f32(__p0, __p1) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f32(__p0_118, __p1_118, __p2_118, __p3_118) __extension__ ({ \ - float32x4_t __s0_118 = __p0_118; \ - float32x4_t __s1_118 = __p1_118; \ - float32x4_t __s2_118 = __p2_118; \ - float32x4_t __ret_118; \ - __ret_118 = vfmaq_laneq_f32(__s0_118, -__s1_118, __s2_118, __p3_118); \ - __ret_118; \ +#define vdups_lane_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfmsq_laneq_f32(__p0_119, __p1_119, __p2_119, __p3_119) __extension__ ({ \ - float32x4_t __s0_119 = __p0_119; \ - float32x4_t __s1_119 = __p1_119; \ - float32x4_t __s2_119 = __p2_119; \ - float32x4_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 3, 2, 1, 0); \ - float32x4_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 3, 2, 1, 0); \ - float32x4_t __rev2_119; __rev2_119 = __builtin_shufflevector(__s2_119, __s2_119, 3, 2, 1, 0); \ - float32x4_t __ret_119; \ - __ret_119 = __noswap_vfmaq_laneq_f32(__rev0_119, -__rev1_119, __rev2_119, __p3_119); \ - __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 3, 2, 1, 0); \ - __ret_119; \ +#define vdups_lane_s32(__p0, __p1) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f64(__p0_120, __p1_120, __p2_120, __p3_120) __extension__ ({ \ - float64x1_t __s0_120 = __p0_120; \ - float64x1_t __s1_120 = __p1_120; \ - float64x2_t __s2_120 = __p2_120; \ - float64x1_t __ret_120; \ - __ret_120 = vfma_laneq_f64(__s0_120, -__s1_120, __s2_120, __p3_120); \ - __ret_120; \ +#define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfms_laneq_f64(__p0_121, __p1_121, __p2_121, __p3_121) __extension__ ({ \ - float64x1_t __s0_121 = __p0_121; \ - float64x1_t __s1_121 = __p1_121; \ - float64x2_t __s2_121 = __p2_121; \ - float64x2_t __rev2_121; __rev2_121 = __builtin_shufflevector(__s2_121, __s2_121, 1, 0); \ - float64x1_t __ret_121; \ - __ret_121 = __noswap_vfma_laneq_f64(__s0_121, -__s1_121, __rev2_121, __p3_121); \ - __ret_121; \ +#define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ + int64x1_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f32(__p0_122, __p1_122, __p2_122, __p3_122) __extension__ ({ \ - float32x2_t __s0_122 = __p0_122; \ - float32x2_t __s1_122 = __p1_122; \ - float32x4_t __s2_122 = __p2_122; \ - float32x2_t __ret_122; \ - __ret_122 = vfma_laneq_f32(__s0_122, -__s1_122, __s2_122, __p3_122); \ - __ret_122; \ +#define vduph_lane_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret; \ }) #else -#define vfms_laneq_f32(__p0_123, __p1_123, __p2_123, __p3_123) __extension__ ({ \ - float32x2_t __s0_123 = __p0_123; \ - float32x2_t __s1_123 = __p1_123; \ - float32x4_t __s2_123 = __p2_123; \ - float32x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, 1, 0); \ - float32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, 1, 0); \ - float32x4_t __rev2_123; __rev2_123 = __builtin_shufflevector(__s2_123, __s2_123, 3, 2, 1, 0); \ - float32x2_t __ret_123; \ - __ret_123 = __noswap_vfma_laneq_f32(__rev0_123, -__rev1_123, __rev2_123, __p3_123); \ - __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, 1, 0); \ - __ret_123; \ +#define vduph_lane_s16(__p0, __p1) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = vfmaq_f64(__p0, -__p1, (float64x2_t) {__p2, __p2}); - return __ret; -} -#else -__ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __noswap_vfmaq_f64(__rev0, -__rev1, (float64x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { - float32x4_t __ret; - __ret = vfmaq_f32(__p0, -__p1, (float32x4_t) {__p2, __p2, __p2, __p2}); - return __ret; -} -#else -__ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = __noswap_vfmaq_f32(__rev0, -__rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); - return __ret; -} -#else -__ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { - float32x2_t __ret; - __ret = vfma_f32(__p0, -__p1, (float32x2_t) {__p2, __p2}); - return __ret; -} -#else -__ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = __noswap_vfma_f32(__rev0, -__rev1, (float32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vget_high_p64(poly64x2_t __p0) { - poly64x1_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1); - return __ret; -} -#else -__ai poly64x1_t vget_high_p64(poly64x2_t __p0) { - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x1_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1); - return __ret; -} -__ai poly64x1_t __noswap_vget_high_p64(poly64x2_t __p0) { - poly64x1_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vget_high_f64(float64x2_t __p0) { - float64x1_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 1); - return __ret; -} +#define vdup_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ + __ret; \ +}) #else -__ai float64x1_t vget_high_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x1_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 1); - return __ret; -} +#define vdup_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_p64(__p0, __p1) __extension__ ({ \ +#define vdupq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ + poly64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vget_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_p64(__p0, __p1) __extension__ ({ \ +#define vdupq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ + poly64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ +#define vdupq_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \ - __ret; \ -}) -#define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ +#define vdupq_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ +#define vdupq_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__rev0, __p1); \ - __ret; \ -}) -#define __noswap_vgetq_lane_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ +#define vdupq_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_f64(__p0, __p1) __extension__ ({ \ +#define vdup_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ + float64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #else -#define vget_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_f64(__p0, __p1) __extension__ ({ \ +#define vdup_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ + float64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vget_low_p64(poly64x2_t __p0) { - poly64x1_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 0); - return __ret; -} +#define vdup_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ + __ret; \ +}) #else -__ai poly64x1_t vget_low_p64(poly64x2_t __p0) { - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x1_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 0); - return __ret; -} +#define vdup_lane_f16(__p0, __p1) __extension__ ({ \ + float16x4_t __s0 = __p0; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vget_low_f64(float64x2_t __p0) { - float64x1_t __ret; - __ret = __builtin_shufflevector(__p0, __p0, 0); - return __ret; -} +#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8_t __ret; \ + __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ + __ret; \ +}) #else -__ai float64x1_t vget_low_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x1_t __ret; - __ret = __builtin_shufflevector(__rev0, __rev0, 0); - return __ret; -} +#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8_t __ret; \ + __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ +#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16_t __ret; \ + __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ +#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16_t __ret; \ + __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p64(__p0) __extension__ ({ \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ +#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_p64(__p0) __extension__ ({ \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f64(__p0) __extension__ ({ \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ +#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_f64(__p0) __extension__ ({ \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ +#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ +#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_dup_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ +#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1_dup_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ +#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_dup_p64(__p0) __extension__ ({ \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ +#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_dup_p64(__p0) __extension__ ({ \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_dup_f64(__p0) __extension__ ({ \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ +#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_dup_f64(__p0) __extension__ ({ \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_dup_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ +#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1_dup_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ +#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ +#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ +#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ +#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ +#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ +#define vdup_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ +#define vdup_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p8_x2(__p0) __extension__ ({ \ - poly8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ +#define vdup_laneq_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #else -#define vld1_p8_x2(__p0) __extension__ ({ \ - poly8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdup_laneq_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64x1_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p64_x2(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ +#define vdup_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_p64_x2(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ +#define vdup_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p16_x2(__p0) __extension__ ({ \ - poly16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ +#define vdupq_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_p16_x2(__p0) __extension__ ({ \ - poly16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdupq_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p8_x2(__p0) __extension__ ({ \ - poly8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ +#define vdupq_laneq_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1q_p8_x2(__p0) __extension__ ({ \ - poly8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p64_x2(__p0) __extension__ ({ \ - poly64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ +#define vdupq_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_p64_x2(__p0) __extension__ ({ \ - poly64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdupq_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p16_x2(__p0) __extension__ ({ \ - poly16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ +#define vdupq_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_p16_x2(__p0) __extension__ ({ \ - poly16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u8_x2(__p0) __extension__ ({ \ - uint8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ +#define vdupq_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_u8_x2(__p0) __extension__ ({ \ - uint8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u32_x2(__p0) __extension__ ({ \ - uint32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ +#define vdupq_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1q_u32_x2(__p0) __extension__ ({ \ - uint32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdupq_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u64_x2(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ +#define vdupq_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_u64_x2(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdupq_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u16_x2(__p0) __extension__ ({ \ - uint16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ +#define vdupq_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_u16_x2(__p0) __extension__ ({ \ - uint16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s8_x2(__p0) __extension__ ({ \ - int8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ +#define vdupq_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1q_s8_x2(__p0) __extension__ ({ \ - int8x16x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f64_x2(__p0) __extension__ ({ \ - float64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ +#define vdupq_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_f64_x2(__p0) __extension__ ({ \ - float64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdupq_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f32_x2(__p0) __extension__ ({ \ - float32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ +#define vdupq_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_f32_x2(__p0) __extension__ ({ \ - float32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdupq_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f16_x2(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ +#define vdupq_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_f16_x2(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s32_x2(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ +#define vdupq_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1q_s32_x2(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdupq_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s64_x2(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ +#define vdupq_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_s64_x2(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdupq_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s16_x2(__p0) __extension__ ({ \ - int16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ +#define vdup_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1q_s16_x2(__p0) __extension__ ({ \ - int16x8x2_t __ret; \ - __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdup_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u8_x2(__p0) __extension__ ({ \ - uint8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ +#define vdup_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1_u8_x2(__p0) __extension__ ({ \ - uint8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdup_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u32_x2(__p0) __extension__ ({ \ - uint32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ +#define vdup_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #else -#define vld1_u32_x2(__p0) __extension__ ({ \ - uint32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdup_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint64x1_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u64_x2(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ +#define vdup_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_u64_x2(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ +#define vdup_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u16_x2(__p0) __extension__ ({ \ - uint16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ +#define vdup_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x8_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_u16_x2(__p0) __extension__ ({ \ - uint16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdup_laneq_s8(__p0, __p1) __extension__ ({ \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s8_x2(__p0) __extension__ ({ \ - int8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ +#define vdup_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #else -#define vld1_s8_x2(__p0) __extension__ ({ \ - int8x8x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdup_laneq_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x1_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f64_x2(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ +#define vdup_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1_f64_x2(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ +#define vdup_laneq_f32(__p0, __p1) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f32_x2(__p0) __extension__ ({ \ - float32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ +#define vdup_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_f32_x2(__p0) __extension__ ({ \ - float32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdup_laneq_f16(__p0, __p1) __extension__ ({ \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f16_x2(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ +#define vdup_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x2_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \ __ret; \ }) #else -#define vld1_f16_x2(__p0) __extension__ ({ \ - float16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vdup_laneq_s32(__p0, __p1) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x2_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s32_x2(__p0) __extension__ ({ \ - int32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ +#define vdup_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x1_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) #else -#define vld1_s32_x2(__p0) __extension__ ({ \ - int32x2x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vdup_laneq_s64(__p0, __p1) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int64x1_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s64_x2(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ +#define vdup_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x4_t __ret; \ + __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \ __ret; \ }) #else -#define vld1_s64_x2(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ +#define vdup_laneq_s16(__p0, __p1) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s16_x2(__p0) __extension__ ({ \ - int16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ +__ai poly64x1_t vdup_n_p64(poly64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t) {__p0}; + return __ret; +} +#else +__ai poly64x1_t vdup_n_p64(poly64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t) {__p0}; + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai poly64x2_t vdupq_n_p64(poly64_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t) {__p0, __p0}; + return __ret; +} +#else +__ai poly64x2_t vdupq_n_p64(poly64_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t) {__p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vdupq_n_f64(float64_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) {__p0, __p0}; + return __ret; +} +#else +__ai float64x2_t vdupq_n_f64(float64_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) {__p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x1_t vdup_n_f64(float64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) {__p0}; + return __ret; +} +#else +__ai float64x1_t vdup_n_f64(float64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) {__p0}; + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vext_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #else -#define vld1_s16_x2(__p0) __extension__ ({ \ - int16x4x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vext_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p8_x3(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ +#define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else -#define vld1_p8_x3(__p0) __extension__ ({ \ - poly8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p64_x3(__p0) __extension__ ({ \ - poly64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ +#define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 42); \ __ret; \ }) #else -#define vld1_p64_x3(__p0) __extension__ ({ \ - poly64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ +#define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p16_x3(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ +#define vext_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #else -#define vld1_p16_x3(__p0) __extension__ ({ \ - poly16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ +#define vext_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p8_x3(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ +__ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#else +__ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#else +__ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +__ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #else -#define vld1q_p8_x3(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret; \ +}) +#define __noswap_vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p64_x3(__p0) __extension__ ({ \ - poly64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ +#define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #else -#define vld1q_p64_x3(__p0) __extension__ ({ \ - poly64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ +#define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret; \ +}) +#define __noswap_vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p16_x3(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ +#define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ __ret; \ }) #else -#define vld1q_p16_x3(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__s2, __p3, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u8_x3(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ +#define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ __ret; \ }) #else -#define vld1q_u8_x3(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 41); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u32_x3(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ +#define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ __ret; \ }) #else -#define vld1q_u32_x3(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ +#define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ + __ret; \ +}) +#define __noswap_vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __s2 = __p2; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u64_x3(__p0) __extension__ ({ \ - uint64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ +#define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ __ret; \ }) #else -#define vld1q_u64_x3(__p0) __extension__ ({ \ - uint64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ +#define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 9); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __s2 = __p2; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u16_x3(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ +#define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #else -#define vld1q_u16_x3(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) +#define __noswap_vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s8_x3(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ +#define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #else -#define vld1q_s8_x3(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) +#define __noswap_vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32_t __s0 = __p0; \ + float32_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32_t __ret; \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f64_x3(__p0) __extension__ ({ \ - float64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ +#define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ __ret; \ }) #else -#define vld1q_f64_x3(__p0) __extension__ ({ \ - float64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ +#define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f32_x3(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ +#define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ __ret; \ }) #else -#define vld1q_f32_x3(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ +#define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 41); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#define __noswap_vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x4_t __ret; \ + __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f16_x3(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ +#define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ __ret; \ }) #else -#define vld1q_f16_x3(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__rev2, __p3, 10); \ + __ret; \ +}) +#define __noswap_vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x2_t __s2 = __p2; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s32_x3(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ +#define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ __ret; \ }) #else -#define vld1q_s32_x3(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ +#define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 9); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ + float32x2_t __ret; \ + __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s64_x3(__p0) __extension__ ({ \ - int64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ - __ret; \ -}) +__ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __ret; + __ret = vfmaq_f64(__p0, __p1, (float64x2_t) {__p2, __p2}); + return __ret; +} #else -#define vld1q_s64_x3(__p0) __extension__ ({ \ - int64x2x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ -}) +__ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __noswap_vfmaq_f64(__rev0, __rev1, (float64x2_t) {__p2, __p2}); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s16_x3(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ - __ret; \ -}) +__ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { + float64x1_t __ret; + __ret = vfma_f64(__p0, __p1, (float64x1_t) {__p2}); + return __ret; +} #else -#define vld1q_s16_x3(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { + float64x1_t __ret; + __ret = __noswap_vfma_f64(__p0, __p1, (float64x1_t) {__p2}); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u8_x3(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ - __ret; \ -}) +__ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = vfmaq_f64(__p0, -__p1, __p2); + return __ret; +} #else -#define vld1_u8_x3(__p0) __extension__ ({ \ - uint8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = __noswap_vfmaq_f64(__rev0, -__rev1, __rev2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u32_x3(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ - __ret; \ -}) +__ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = vfma_f64(__p0, -__p1, __p2); + return __ret; +} #else -#define vld1_u32_x3(__p0) __extension__ ({ \ - uint32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ -}) +__ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = __noswap_vfma_f64(__p0, -__p1, __p2); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u64_x3(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ - __ret; \ +#define vfmsd_lane_f64(__p0_100, __p1_100, __p2_100, __p3_100) __extension__ ({ \ + float64_t __s0_100 = __p0_100; \ + float64_t __s1_100 = __p1_100; \ + float64x1_t __s2_100 = __p2_100; \ + float64_t __ret_100; \ + __ret_100 = vfmad_lane_f64(__s0_100, -__s1_100, __s2_100, __p3_100); \ + __ret_100; \ }) #else -#define vld1_u64_x3(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ - __ret; \ +#define vfmsd_lane_f64(__p0_101, __p1_101, __p2_101, __p3_101) __extension__ ({ \ + float64_t __s0_101 = __p0_101; \ + float64_t __s1_101 = __p1_101; \ + float64x1_t __s2_101 = __p2_101; \ + float64_t __ret_101; \ + __ret_101 = __noswap_vfmad_lane_f64(__s0_101, -__s1_101, __s2_101, __p3_101); \ + __ret_101; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u16_x3(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ - __ret; \ +#define vfmss_lane_f32(__p0_102, __p1_102, __p2_102, __p3_102) __extension__ ({ \ + float32_t __s0_102 = __p0_102; \ + float32_t __s1_102 = __p1_102; \ + float32x2_t __s2_102 = __p2_102; \ + float32_t __ret_102; \ + __ret_102 = vfmas_lane_f32(__s0_102, -__s1_102, __s2_102, __p3_102); \ + __ret_102; \ }) #else -#define vld1_u16_x3(__p0) __extension__ ({ \ - uint16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ +#define vfmss_lane_f32(__p0_103, __p1_103, __p2_103, __p3_103) __extension__ ({ \ + float32_t __s0_103 = __p0_103; \ + float32_t __s1_103 = __p1_103; \ + float32x2_t __s2_103 = __p2_103; \ + float32x2_t __rev2_103; __rev2_103 = __builtin_shufflevector(__s2_103, __s2_103, 1, 0); \ + float32_t __ret_103; \ + __ret_103 = __noswap_vfmas_lane_f32(__s0_103, -__s1_103, __rev2_103, __p3_103); \ + __ret_103; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s8_x3(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ - __ret; \ +#define vfmsq_lane_f64(__p0_104, __p1_104, __p2_104, __p3_104) __extension__ ({ \ + float64x2_t __s0_104 = __p0_104; \ + float64x2_t __s1_104 = __p1_104; \ + float64x1_t __s2_104 = __p2_104; \ + float64x2_t __ret_104; \ + __ret_104 = vfmaq_lane_f64(__s0_104, -__s1_104, __s2_104, __p3_104); \ + __ret_104; \ }) #else -#define vld1_s8_x3(__p0) __extension__ ({ \ - int8x8x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vfmsq_lane_f64(__p0_105, __p1_105, __p2_105, __p3_105) __extension__ ({ \ + float64x2_t __s0_105 = __p0_105; \ + float64x2_t __s1_105 = __p1_105; \ + float64x1_t __s2_105 = __p2_105; \ + float64x2_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, 1, 0); \ + float64x2_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, 1, 0); \ + float64x2_t __ret_105; \ + __ret_105 = __noswap_vfmaq_lane_f64(__rev0_105, -__rev1_105, __s2_105, __p3_105); \ + __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, 1, 0); \ + __ret_105; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f64_x3(__p0) __extension__ ({ \ - float64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ - __ret; \ +#define vfmsq_lane_f32(__p0_106, __p1_106, __p2_106, __p3_106) __extension__ ({ \ + float32x4_t __s0_106 = __p0_106; \ + float32x4_t __s1_106 = __p1_106; \ + float32x2_t __s2_106 = __p2_106; \ + float32x4_t __ret_106; \ + __ret_106 = vfmaq_lane_f32(__s0_106, -__s1_106, __s2_106, __p3_106); \ + __ret_106; \ }) #else -#define vld1_f64_x3(__p0) __extension__ ({ \ - float64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ - __ret; \ +#define vfmsq_lane_f32(__p0_107, __p1_107, __p2_107, __p3_107) __extension__ ({ \ + float32x4_t __s0_107 = __p0_107; \ + float32x4_t __s1_107 = __p1_107; \ + float32x2_t __s2_107 = __p2_107; \ + float32x4_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 3, 2, 1, 0); \ + float32x4_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 3, 2, 1, 0); \ + float32x2_t __rev2_107; __rev2_107 = __builtin_shufflevector(__s2_107, __s2_107, 1, 0); \ + float32x4_t __ret_107; \ + __ret_107 = __noswap_vfmaq_lane_f32(__rev0_107, -__rev1_107, __rev2_107, __p3_107); \ + __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 3, 2, 1, 0); \ + __ret_107; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f32_x3(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ - __ret; \ +#define vfms_lane_f64(__p0_108, __p1_108, __p2_108, __p3_108) __extension__ ({ \ + float64x1_t __s0_108 = __p0_108; \ + float64x1_t __s1_108 = __p1_108; \ + float64x1_t __s2_108 = __p2_108; \ + float64x1_t __ret_108; \ + __ret_108 = vfma_lane_f64(__s0_108, -__s1_108, __s2_108, __p3_108); \ + __ret_108; \ }) #else -#define vld1_f32_x3(__p0) __extension__ ({ \ - float32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ +#define vfms_lane_f64(__p0_109, __p1_109, __p2_109, __p3_109) __extension__ ({ \ + float64x1_t __s0_109 = __p0_109; \ + float64x1_t __s1_109 = __p1_109; \ + float64x1_t __s2_109 = __p2_109; \ + float64x1_t __ret_109; \ + __ret_109 = __noswap_vfma_lane_f64(__s0_109, -__s1_109, __s2_109, __p3_109); \ + __ret_109; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f16_x3(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ - __ret; \ +#define vfms_lane_f32(__p0_110, __p1_110, __p2_110, __p3_110) __extension__ ({ \ + float32x2_t __s0_110 = __p0_110; \ + float32x2_t __s1_110 = __p1_110; \ + float32x2_t __s2_110 = __p2_110; \ + float32x2_t __ret_110; \ + __ret_110 = vfma_lane_f32(__s0_110, -__s1_110, __s2_110, __p3_110); \ + __ret_110; \ }) #else -#define vld1_f16_x3(__p0) __extension__ ({ \ - float16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ +#define vfms_lane_f32(__p0_111, __p1_111, __p2_111, __p3_111) __extension__ ({ \ + float32x2_t __s0_111 = __p0_111; \ + float32x2_t __s1_111 = __p1_111; \ + float32x2_t __s2_111 = __p2_111; \ + float32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, 1, 0); \ + float32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, 1, 0); \ + float32x2_t __rev2_111; __rev2_111 = __builtin_shufflevector(__s2_111, __s2_111, 1, 0); \ + float32x2_t __ret_111; \ + __ret_111 = __noswap_vfma_lane_f32(__rev0_111, -__rev1_111, __rev2_111, __p3_111); \ + __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, 1, 0); \ + __ret_111; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s32_x3(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ - __ret; \ +#define vfmsd_laneq_f64(__p0_112, __p1_112, __p2_112, __p3_112) __extension__ ({ \ + float64_t __s0_112 = __p0_112; \ + float64_t __s1_112 = __p1_112; \ + float64x2_t __s2_112 = __p2_112; \ + float64_t __ret_112; \ + __ret_112 = vfmad_laneq_f64(__s0_112, -__s1_112, __s2_112, __p3_112); \ + __ret_112; \ }) #else -#define vld1_s32_x3(__p0) __extension__ ({ \ - int32x2x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ +#define vfmsd_laneq_f64(__p0_113, __p1_113, __p2_113, __p3_113) __extension__ ({ \ + float64_t __s0_113 = __p0_113; \ + float64_t __s1_113 = __p1_113; \ + float64x2_t __s2_113 = __p2_113; \ + float64x2_t __rev2_113; __rev2_113 = __builtin_shufflevector(__s2_113, __s2_113, 1, 0); \ + float64_t __ret_113; \ + __ret_113 = __noswap_vfmad_laneq_f64(__s0_113, -__s1_113, __rev2_113, __p3_113); \ + __ret_113; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s64_x3(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ - __ret; \ +#define vfmss_laneq_f32(__p0_114, __p1_114, __p2_114, __p3_114) __extension__ ({ \ + float32_t __s0_114 = __p0_114; \ + float32_t __s1_114 = __p1_114; \ + float32x4_t __s2_114 = __p2_114; \ + float32_t __ret_114; \ + __ret_114 = vfmas_laneq_f32(__s0_114, -__s1_114, __s2_114, __p3_114); \ + __ret_114; \ }) #else -#define vld1_s64_x3(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ - __ret; \ +#define vfmss_laneq_f32(__p0_115, __p1_115, __p2_115, __p3_115) __extension__ ({ \ + float32_t __s0_115 = __p0_115; \ + float32_t __s1_115 = __p1_115; \ + float32x4_t __s2_115 = __p2_115; \ + float32x4_t __rev2_115; __rev2_115 = __builtin_shufflevector(__s2_115, __s2_115, 3, 2, 1, 0); \ + float32_t __ret_115; \ + __ret_115 = __noswap_vfmas_laneq_f32(__s0_115, -__s1_115, __rev2_115, __p3_115); \ + __ret_115; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s16_x3(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ - __ret; \ +#define vfmsq_laneq_f64(__p0_116, __p1_116, __p2_116, __p3_116) __extension__ ({ \ + float64x2_t __s0_116 = __p0_116; \ + float64x2_t __s1_116 = __p1_116; \ + float64x2_t __s2_116 = __p2_116; \ + float64x2_t __ret_116; \ + __ret_116 = vfmaq_laneq_f64(__s0_116, -__s1_116, __s2_116, __p3_116); \ + __ret_116; \ }) #else -#define vld1_s16_x3(__p0) __extension__ ({ \ - int16x4x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ +#define vfmsq_laneq_f64(__p0_117, __p1_117, __p2_117, __p3_117) __extension__ ({ \ + float64x2_t __s0_117 = __p0_117; \ + float64x2_t __s1_117 = __p1_117; \ + float64x2_t __s2_117 = __p2_117; \ + float64x2_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, 1, 0); \ + float64x2_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, 1, 0); \ + float64x2_t __rev2_117; __rev2_117 = __builtin_shufflevector(__s2_117, __s2_117, 1, 0); \ + float64x2_t __ret_117; \ + __ret_117 = __noswap_vfmaq_laneq_f64(__rev0_117, -__rev1_117, __rev2_117, __p3_117); \ + __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, 1, 0); \ + __ret_117; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p8_x4(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ - __ret; \ +#define vfmsq_laneq_f32(__p0_118, __p1_118, __p2_118, __p3_118) __extension__ ({ \ + float32x4_t __s0_118 = __p0_118; \ + float32x4_t __s1_118 = __p1_118; \ + float32x4_t __s2_118 = __p2_118; \ + float32x4_t __ret_118; \ + __ret_118 = vfmaq_laneq_f32(__s0_118, -__s1_118, __s2_118, __p3_118); \ + __ret_118; \ }) #else -#define vld1_p8_x4(__p0) __extension__ ({ \ - poly8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ +#define vfmsq_laneq_f32(__p0_119, __p1_119, __p2_119, __p3_119) __extension__ ({ \ + float32x4_t __s0_119 = __p0_119; \ + float32x4_t __s1_119 = __p1_119; \ + float32x4_t __s2_119 = __p2_119; \ + float32x4_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 3, 2, 1, 0); \ + float32x4_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 3, 2, 1, 0); \ + float32x4_t __rev2_119; __rev2_119 = __builtin_shufflevector(__s2_119, __s2_119, 3, 2, 1, 0); \ + float32x4_t __ret_119; \ + __ret_119 = __noswap_vfmaq_laneq_f32(__rev0_119, -__rev1_119, __rev2_119, __p3_119); \ + __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 3, 2, 1, 0); \ + __ret_119; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p64_x4(__p0) __extension__ ({ \ - poly64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ - __ret; \ +#define vfms_laneq_f64(__p0_120, __p1_120, __p2_120, __p3_120) __extension__ ({ \ + float64x1_t __s0_120 = __p0_120; \ + float64x1_t __s1_120 = __p1_120; \ + float64x2_t __s2_120 = __p2_120; \ + float64x1_t __ret_120; \ + __ret_120 = vfma_laneq_f64(__s0_120, -__s1_120, __s2_120, __p3_120); \ + __ret_120; \ }) #else -#define vld1_p64_x4(__p0) __extension__ ({ \ - poly64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ - __ret; \ +#define vfms_laneq_f64(__p0_121, __p1_121, __p2_121, __p3_121) __extension__ ({ \ + float64x1_t __s0_121 = __p0_121; \ + float64x1_t __s1_121 = __p1_121; \ + float64x2_t __s2_121 = __p2_121; \ + float64x2_t __rev2_121; __rev2_121 = __builtin_shufflevector(__s2_121, __s2_121, 1, 0); \ + float64x1_t __ret_121; \ + __ret_121 = __noswap_vfma_laneq_f64(__s0_121, -__s1_121, __rev2_121, __p3_121); \ + __ret_121; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_p16_x4(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ - __ret; \ +#define vfms_laneq_f32(__p0_122, __p1_122, __p2_122, __p3_122) __extension__ ({ \ + float32x2_t __s0_122 = __p0_122; \ + float32x2_t __s1_122 = __p1_122; \ + float32x4_t __s2_122 = __p2_122; \ + float32x2_t __ret_122; \ + __ret_122 = vfma_laneq_f32(__s0_122, -__s1_122, __s2_122, __p3_122); \ + __ret_122; \ }) #else -#define vld1_p16_x4(__p0) __extension__ ({ \ - poly16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ - __ret; \ +#define vfms_laneq_f32(__p0_123, __p1_123, __p2_123, __p3_123) __extension__ ({ \ + float32x2_t __s0_123 = __p0_123; \ + float32x2_t __s1_123 = __p1_123; \ + float32x4_t __s2_123 = __p2_123; \ + float32x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, 1, 0); \ + float32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, 1, 0); \ + float32x4_t __rev2_123; __rev2_123 = __builtin_shufflevector(__s2_123, __s2_123, 3, 2, 1, 0); \ + float32x2_t __ret_123; \ + __ret_123 = __noswap_vfma_laneq_f32(__rev0_123, -__rev1_123, __rev2_123, __p3_123); \ + __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, 1, 0); \ + __ret_123; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p8_x4(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ - __ret; \ -}) +__ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __ret; + __ret = vfmaq_f64(__p0, -__p1, (float64x2_t) {__p2, __p2}); + return __ret; +} #else -#define vld1q_p8_x4(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __noswap_vfmaq_f64(__rev0, -__rev1, (float64x2_t) {__p2, __p2}); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p64_x4(__p0) __extension__ ({ \ - poly64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ - __ret; \ -}) +__ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { + float32x4_t __ret; + __ret = vfmaq_f32(__p0, -__p1, (float32x4_t) {__p2, __p2, __p2, __p2}); + return __ret; +} #else -#define vld1q_p64_x4(__p0) __extension__ ({ \ - poly64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) +__ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = __noswap_vfmaq_f32(__rev0, -__rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_p16_x4(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ - __ret; \ -}) +__ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { + float64x1_t __ret; + __ret = vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); + return __ret; +} #else -#define vld1q_p16_x4(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { + float64x1_t __ret; + __ret = __noswap_vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u8_x4(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ - __ret; \ -}) +__ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { + float32x2_t __ret; + __ret = vfma_f32(__p0, -__p1, (float32x2_t) {__p2, __p2}); + return __ret; +} #else -#define vld1q_u8_x4(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = __noswap_vfma_f32(__rev0, -__rev1, (float32x2_t) {__p2, __p2}); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u32_x4(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ - __ret; \ -}) +__ai poly64x1_t vget_high_p64(poly64x2_t __p0) { + poly64x1_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1); + return __ret; +} #else -#define vld1q_u32_x4(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ - __ret; \ -}) +__ai poly64x1_t vget_high_p64(poly64x2_t __p0) { + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x1_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1); + return __ret; +} +__ai poly64x1_t __noswap_vget_high_p64(poly64x2_t __p0) { + poly64x1_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u64_x4(__p0) __extension__ ({ \ - uint64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ - __ret; \ -}) +__ai float64x1_t vget_high_f64(float64x2_t __p0) { + float64x1_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1); + return __ret; +} #else -#define vld1q_u64_x4(__p0) __extension__ ({ \ - uint64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) +__ai float64x1_t vget_high_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x1_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_u16_x4(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ +#define vget_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_u16_x4(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld1q_s8_x4(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ +#define vget_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ __ret; \ }) -#else -#define vld1q_s8_x4(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define __noswap_vget_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f64_x4(__p0) __extension__ ({ \ - float64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ +#define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_f64_x4(__p0) __extension__ ({ \ - float64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld1q_f32_x4(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ +#define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \ __ret; \ }) -#else -#define vld1q_f32_x4(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64_t __ret; \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_f16_x4(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ +#define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_f16_x4(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__rev0, __p1); \ + __ret; \ +}) +#define __noswap_vgetq_lane_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s32_x4(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ +#define vget_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ __ret; \ }) #else -#define vld1q_s32_x4(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define vget_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ + __ret; \ +}) +#define __noswap_vget_lane_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64_t __ret; \ + __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s64_x4(__p0) __extension__ ({ \ - int64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ - __ret; \ -}) +__ai poly64x1_t vget_low_p64(poly64x2_t __p0) { + poly64x1_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 0); + return __ret; +} #else -#define vld1q_s64_x4(__p0) __extension__ ({ \ - int64x2x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) +__ai poly64x1_t vget_low_p64(poly64x2_t __p0) { + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x1_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vld1q_s16_x4(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ +__ai float64x1_t vget_low_f64(float64x2_t __p0) { + float64x1_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 0); + return __ret; +} +#else +__ai float64x1_t vget_low_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x1_t __ret; + __ret = __builtin_shufflevector(__rev0, __rev0, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1_p64(__p0) __extension__ ({ \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ __ret; \ }) #else -#define vld1q_s16_x4(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld1_p64(__p0) __extension__ ({ \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u8_x4(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ +#define vld1q_p64(__p0) __extension__ ({ \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ __ret; \ }) #else -#define vld1_u8_x4(__p0) __extension__ ({ \ - uint8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld1q_p64(__p0) __extension__ ({ \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u32_x4(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ +#define vld1q_f64(__p0) __extension__ ({ \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ __ret; \ }) #else -#define vld1_u32_x4(__p0) __extension__ ({ \ - uint32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ +#define vld1q_f64(__p0) __extension__ ({ \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u64_x4(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ +#define vld1_f64(__p0) __extension__ ({ \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ __ret; \ }) #else -#define vld1_u64_x4(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ +#define vld1_f64(__p0) __extension__ ({ \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_u16_x4(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ +#define vld1_dup_p64(__p0) __extension__ ({ \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ __ret; \ }) #else -#define vld1_u16_x4(__p0) __extension__ ({ \ - uint16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define vld1_dup_p64(__p0) __extension__ ({ \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s8_x4(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ +#define vld1q_dup_p64(__p0) __extension__ ({ \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ __ret; \ }) #else -#define vld1_s8_x4(__p0) __extension__ ({ \ - int8x8x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld1q_dup_p64(__p0) __extension__ ({ \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f64_x4(__p0) __extension__ ({ \ - float64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ +#define vld1q_dup_f64(__p0) __extension__ ({ \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ __ret; \ }) #else -#define vld1_f64_x4(__p0) __extension__ ({ \ - float64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ +#define vld1q_dup_f64(__p0) __extension__ ({ \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f32_x4(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ +#define vld1_dup_f64(__p0) __extension__ ({ \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ __ret; \ }) #else -#define vld1_f32_x4(__p0) __extension__ ({ \ - float32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ +#define vld1_dup_f64(__p0) __extension__ ({ \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_f16_x4(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ +#define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #else -#define vld1_f16_x4(__p0) __extension__ ({ \ - float16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s32_x4(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ +#define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else -#define vld1_s32_x4(__p0) __extension__ ({ \ - int32x2x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ +#define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s64_x4(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ +#define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ __ret; \ }) #else -#define vld1_s64_x4(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ +#define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld1_s16_x4(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ +#define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #else -#define vld1_s16_x4(__p0) __extension__ ({ \ - int16x4x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ +#define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_p64(__p0) __extension__ ({ \ +#define vld1_p64_x2(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 6); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ __ret; \ }) #else -#define vld2_p64(__p0) __extension__ ({ \ +#define vld1_p64_x2(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 6); \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_p64(__p0) __extension__ ({ \ +#define vld1q_p64_x2(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 38); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ __ret; \ }) #else -#define vld2q_p64(__p0) __extension__ ({ \ +#define vld1q_p64_x2(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 38); \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -53372,15 +55294,15 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_u64(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 51); \ +#define vld1q_f64_x2(__p0) __extension__ ({ \ + float64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ __ret; \ }) #else -#define vld2q_u64(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 51); \ +#define vld1q_f64_x2(__p0) __extension__ ({ \ + float64x2x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -53389,213 +55311,207 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_f64(__p0) __extension__ ({ \ - float64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 42); \ +#define vld1_f64_x2(__p0) __extension__ ({ \ + float64x1x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ __ret; \ }) #else -#define vld2q_f64(__p0) __extension__ ({ \ - float64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 42); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vld1_f64_x2(__p0) __extension__ ({ \ + float64x1x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_s64(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 35); \ +#define vld1_p64_x3(__p0) __extension__ ({ \ + poly64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ __ret; \ }) #else -#define vld2q_s64(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld2q_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vld1_p64_x3(__p0) __extension__ ({ \ + poly64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_f64(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 10); \ +#define vld1q_p64_x3(__p0) __extension__ ({ \ + poly64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ __ret; \ }) #else -#define vld2_f64(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 10); \ +#define vld1q_p64_x3(__p0) __extension__ ({ \ + poly64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2_dup_p64(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ +#define vld1q_f64_x3(__p0) __extension__ ({ \ + float64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ __ret; \ }) #else -#define vld2_dup_p64(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ +#define vld1q_f64_x3(__p0) __extension__ ({ \ + float64x2x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_p8(__p0) __extension__ ({ \ - poly8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ +#define vld1_f64_x3(__p0) __extension__ ({ \ + float64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ __ret; \ }) #else -#define vld2q_dup_p8(__p0) __extension__ ({ \ - poly8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld1_f64_x3(__p0) __extension__ ({ \ + float64x1x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_p64(__p0) __extension__ ({ \ - poly64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ +#define vld1_p64_x4(__p0) __extension__ ({ \ + poly64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ __ret; \ }) #else -#define vld2q_dup_p64(__p0) __extension__ ({ \ - poly64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vld1_p64_x4(__p0) __extension__ ({ \ + poly64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_p16(__p0) __extension__ ({ \ - poly16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ +#define vld1q_p64_x4(__p0) __extension__ ({ \ + poly64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ __ret; \ }) #else -#define vld2q_dup_p16(__p0) __extension__ ({ \ - poly16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ +#define vld1q_p64_x4(__p0) __extension__ ({ \ + poly64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_u8(__p0) __extension__ ({ \ - uint8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ +#define vld1q_f64_x4(__p0) __extension__ ({ \ + float64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ __ret; \ }) #else -#define vld2q_dup_u8(__p0) __extension__ ({ \ - uint8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ +#define vld1q_f64_x4(__p0) __extension__ ({ \ + float64x2x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_u32(__p0) __extension__ ({ \ - uint32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ +#define vld1_f64_x4(__p0) __extension__ ({ \ + float64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ __ret; \ }) #else -#define vld2q_dup_u32(__p0) __extension__ ({ \ - uint32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vld1_f64_x4(__p0) __extension__ ({ \ + float64x1x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_u64(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ +#define vld2_p64(__p0) __extension__ ({ \ + poly64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 6); \ __ret; \ }) #else -#define vld2q_dup_u64(__p0) __extension__ ({ \ - uint64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ +#define vld2_p64(__p0) __extension__ ({ \ + poly64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_u16(__p0) __extension__ ({ \ - uint16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ +#define vld2q_p64(__p0) __extension__ ({ \ + poly64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 38); \ __ret; \ }) #else -#define vld2q_dup_u16(__p0) __extension__ ({ \ - uint16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ +#define vld2q_p64(__p0) __extension__ ({ \ + poly64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_s8(__p0) __extension__ ({ \ - int8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ +#define vld2q_u64(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 51); \ __ret; \ }) #else -#define vld2q_dup_s8(__p0) __extension__ ({ \ - int8x16x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ +#define vld2q_u64(__p0) __extension__ ({ \ + uint64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_f64(__p0) __extension__ ({ \ +#define vld2q_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ + __builtin_neon_vld2q_v(&__ret, __p0, 42); \ __ret; \ }) #else -#define vld2q_dup_f64(__p0) __extension__ ({ \ +#define vld2q_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ + __builtin_neon_vld2q_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -53604,66 +55520,60 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_f32(__p0) __extension__ ({ \ - float32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ +#define vld2q_s64(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 35); \ __ret; \ }) #else -#define vld2q_dup_f32(__p0) __extension__ ({ \ - float32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ +#define vld2q_s64(__p0) __extension__ ({ \ + int64x2x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_f16(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ +#define vld2_f64(__p0) __extension__ ({ \ + float64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 10); \ __ret; \ }) #else -#define vld2q_dup_f16(__p0) __extension__ ({ \ - float16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vld2_f64(__p0) __extension__ ({ \ + float64x1x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_s32(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ +#define vld2_dup_p64(__p0) __extension__ ({ \ + poly64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ __ret; \ }) #else -#define vld2q_dup_s32(__p0) __extension__ ({ \ - int32x4x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ +#define vld2_dup_p64(__p0) __extension__ ({ \ + poly64x1x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_s64(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ +#define vld2q_dup_p64(__p0) __extension__ ({ \ + poly64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ __ret; \ }) #else -#define vld2q_dup_s64(__p0) __extension__ ({ \ - int64x2x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ +#define vld2q_dup_p64(__p0) __extension__ ({ \ + poly64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -53672,18 +55582,18 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #endif #ifdef __LITTLE_ENDIAN__ -#define vld2q_dup_s16(__p0) __extension__ ({ \ - int16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ +#define vld2q_dup_f64(__p0) __extension__ ({ \ + float64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ __ret; \ }) #else -#define vld2q_dup_s16(__p0) __extension__ ({ \ - int16x8x2_t __ret; \ - __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ +#define vld2q_dup_f64(__p0) __extension__ ({ \ + float64x2x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif @@ -54034,24 +55944,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_p8(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ - __ret; \ -}) -#else -#define vld3q_dup_p8(__p0) __extension__ ({ \ - poly8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ @@ -54070,114 +55962,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_p16(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ - __ret; \ -}) -#else -#define vld3q_dup_p16(__p0) __extension__ ({ \ - poly16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_u8(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ - __ret; \ -}) -#else -#define vld3q_dup_u8(__p0) __extension__ ({ \ - uint8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_u32(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ - __ret; \ -}) -#else -#define vld3q_dup_u32(__p0) __extension__ ({ \ - uint32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_u64(__p0) __extension__ ({ \ - uint64x2x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ - __ret; \ -}) -#else -#define vld3q_dup_u64(__p0) __extension__ ({ \ - uint64x2x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_u16(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ - __ret; \ -}) -#else -#define vld3q_dup_u16(__p0) __extension__ ({ \ - uint16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_s8(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ - __ret; \ -}) -#else -#define vld3q_dup_s8(__p0) __extension__ ({ \ - int8x16x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_f64(__p0) __extension__ ({ \ float64x2x3_t __ret; \ @@ -54196,96 +55980,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_f32(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ - __ret; \ -}) -#else -#define vld3q_dup_f32(__p0) __extension__ ({ \ - float32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_f16(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ - __ret; \ -}) -#else -#define vld3q_dup_f16(__p0) __extension__ ({ \ - float16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_s32(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ - __ret; \ -}) -#else -#define vld3q_dup_s32(__p0) __extension__ ({ \ - int32x4x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_s64(__p0) __extension__ ({ \ - int64x2x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ - __ret; \ -}) -#else -#define vld3q_dup_s64(__p0) __extension__ ({ \ - int64x2x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld3q_dup_s16(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ - __ret; \ -}) -#else -#define vld3q_dup_s16(__p0) __extension__ ({ \ - int16x8x3_t __ret; \ - __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3_dup_f64(__p0) __extension__ ({ \ float64x1x3_t __ret; \ @@ -54650,25 +56344,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_p8(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ - __ret; \ -}) -#else -#define vld4q_dup_p8(__p0) __extension__ ({ \ - poly8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ @@ -54688,120 +56363,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_p16(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ - __ret; \ -}) -#else -#define vld4q_dup_p16(__p0) __extension__ ({ \ - poly16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_u8(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ - __ret; \ -}) -#else -#define vld4q_dup_u8(__p0) __extension__ ({ \ - uint8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_u32(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ - __ret; \ -}) -#else -#define vld4q_dup_u32(__p0) __extension__ ({ \ - uint32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_u64(__p0) __extension__ ({ \ - uint64x2x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ - __ret; \ -}) -#else -#define vld4q_dup_u64(__p0) __extension__ ({ \ - uint64x2x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_u16(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ - __ret; \ -}) -#else -#define vld4q_dup_u16(__p0) __extension__ ({ \ - uint16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_s8(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ - __ret; \ -}) -#else -#define vld4q_dup_s8(__p0) __extension__ ({ \ - int8x16x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_f64(__p0) __extension__ ({ \ float64x2x4_t __ret; \ @@ -54821,101 +56382,6 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_f32(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ - __ret; \ -}) -#else -#define vld4q_dup_f32(__p0) __extension__ ({ \ - float32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_f16(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ - __ret; \ -}) -#else -#define vld4q_dup_f16(__p0) __extension__ ({ \ - float16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_s32(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ - __ret; \ -}) -#else -#define vld4q_dup_s32(__p0) __extension__ ({ \ - int32x4x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_s64(__p0) __extension__ ({ \ - int64x2x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ - __ret; \ -}) -#else -#define vld4q_dup_s64(__p0) __extension__ ({ \ - int64x2x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vld4q_dup_s16(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ - __ret; \ -}) -#else -#define vld4q_dup_s16(__p0) __extension__ ({ \ - int16x8x4_t __ret; \ - __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ - \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4_dup_f64(__p0) __extension__ ({ \ float64x1x4_t __ret; \ @@ -55639,779 +57105,202 @@ __ai float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vminvq_f64((int8x16_t)__rev0); return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32_t vminvq_f32(float32x4_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__p0); - return __ret; -} -#else -__ai float32_t vminvq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32_t vminvq_s32(int32x4_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__p0); - return __ret; -} -#else -__ai int32_t vminvq_s32(int32x4_t __p0) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16_t vminvq_s16(int16x8_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__p0); - return __ret; -} -#else -__ai int16_t vminvq_s16(int16x8_t __p0) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8_t vminv_u8(uint8x8_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__p0); - return __ret; -} -#else -__ai uint8_t vminv_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32_t vminv_u32(uint32x2_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__p0); - return __ret; -} -#else -__ai uint32_t vminv_u32(uint32x2_t __p0) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16_t vminv_u16(uint16x4_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__p0); - return __ret; -} -#else -__ai uint16_t vminv_u16(uint16x4_t __p0) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8_t vminv_s8(int8x8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__p0); - return __ret; -} -#else -__ai int8_t vminv_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8_t __ret; - __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32_t vminv_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__p0); - return __ret; -} -#else -__ai float32_t vminv_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32_t vminv_s32(int32x2_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__p0); - return __ret; -} -#else -__ai int32_t vminv_s32(int32x2_t __p0) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32_t __ret; - __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16_t vminv_s16(int16x4_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__p0); - return __ret; -} -#else -__ai int16_t vminv_s16(int16x4_t __p0) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16_t __ret; - __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = __p0 + __p1 * __p2; - return __ret; -} -#else -__ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - float64x2_t __ret; - __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __p0 + __p1 * __p2; - return __ret; -} -#else -__ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __p0 + __p1 * __p2; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint32x4_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmlaq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint16x8_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmlaq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmlaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float32x4_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmlaq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmlaq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint32x2_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ - __ret; \ -}) -#else -#define vmla_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint32x2_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint16x4_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmla_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x2_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ - __ret; \ -}) -#else -#define vmla_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __s2 = __p2; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - float32x2_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ - __ret; \ -}) -#else -#define vmla_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x2_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmla_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = __p0 + __p1 * (float64x2_t) {__p2, __p2}; - return __ret; -} -#else -__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 + __rev1 * (float64x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x2_t __s2 = __p2; \ - uint64x2_t __ret; \ - __ret = __s0 + vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlal_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x2_t __s2 = __p2; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - uint64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x4_t __s2 = __p2; \ - uint32x4_t __ret; \ - __ret = __s0 + vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlal_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = __s0 + vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = __s0 + vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai float32_t vminvq_f32(float32x4_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__p0); + return __ret; +} #else -#define vmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32_t vminvq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint64x2_t __ret; \ - __ret = __s0 + vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai int32_t vminvq_s32(int32x4_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__p0); + return __ret; +} #else -#define vmlal_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int32_t vminvq_s32(int32x4_t __p0) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint32x4_t __ret; \ - __ret = __s0 + vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int16_t vminvq_s16(int16x8_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__p0); + return __ret; +} #else -#define vmlal_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16_t vminvq_s16(int16x8_t __p0) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = __s0 + vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai uint8_t vminv_u8(uint8x8_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint8_t vminv_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = __s0 + vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai uint32_t vminv_u32(uint32x2_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint32_t vminv_u32(uint32x2_t __p0) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint64x2_t __ret; \ - __ret = __s0 + vmull_u32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai uint16_t vminv_u16(uint16x4_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_u32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint16_t vminv_u16(uint16x4_t __p0) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint32x4_t __ret; \ - __ret = __s0 + vmull_u16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int8_t vminv_s8(int8x8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_u16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int8_t vminv_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8_t __ret; + __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = __s0 + vmull_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai float32_t vminv_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __rev0 + __noswap_vmull_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float32_t vminv_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = __s0 + vmull_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int32_t vminv_s32(int32x2_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__p0); + return __ret; +} #else -#define vmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __rev0 + __noswap_vmull_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int32_t vminv_s32(int32x2_t __p0) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32_t __ret; + __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { +__ai int16_t vminv_s16(int16x4_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__p0); + return __ret; +} +#else +__ai int16_t vminv_s16(int16x4_t __p0) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16_t __ret; + __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__rev0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = __p0 - __p1 * __p2; + __ret = __p0 + __p1 * __p2; return __ret; } #else -__ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { +__ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); float64x2_t __ret; - __ret = __rev0 - __rev1 * __rev2; + __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { +__ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; - __ret = __p0 - __p1 * __p2; + __ret = __p0 + __p1 * __p2; return __ret; } #else -__ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { +__ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; - __ret = __p0 - __p1 * __p2; + __ret = __p0 + __p1 * __p2; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmlsq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ @@ -56419,23 +57308,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ uint16x8_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmlsq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ @@ -56443,23 +57332,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32x4_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmlsq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ @@ -56467,23 +57356,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float32x4_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmlsq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ @@ -56491,23 +57380,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int16x8_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmlsq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlaq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ @@ -56515,23 +57404,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x2_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmls_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ @@ -56539,23 +57428,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x2_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ uint16x4_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmls_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ @@ -56563,23 +57452,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32x2_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmls_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ @@ -56587,23 +57476,23 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float32x2_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int32x2_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ @@ -56611,9790 +57500,9239 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x2_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x4_t __ret; \ - __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ - __ret; \ -}) -#else -#define vmls_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmla_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = __p0 - __p1 * (float64x2_t) {__p2, __p2}; - return __ret; -} -#else -__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 - __rev1 * (float64x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x2_t __s2 = __p2; \ - uint64x2_t __ret; \ - __ret = __s0 - vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlsl_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint64x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x2_t __s2 = __p2; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - uint64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x4_t __s2 = __p2; \ - uint32x4_t __ret; \ - __ret = __s0 - vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlsl_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = __s0 - vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = __s0 - vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) -#else -#define vmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __s0 + __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ + __ret; \ +}) +#else +#define vmla_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __rev0 + __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __ret; + __ret = __p0 + __p1 * (float64x2_t) {__p2, __p2}; + return __ret; +} +#else +__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __rev0 + __rev1 * (float64x2_t) {__p2, __p2}; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ + uint32x2_t __s2 = __p2; \ uint64x2_t __ret; \ - __ret = __s0 - vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret = __s0 + vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __s2 = __p2; \ + uint32x2_t __s2 = __p2; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ uint64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ + uint16x4_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = __s0 - vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret = __s0 + vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __s2 = __p2; \ + uint16x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ + int32x2_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = __s0 - vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret = __s0 + vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ + int32x2_t __s2 = __p2; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ int64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ + int16x4_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = __s0 - vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret = __s0 + vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ + int16x4_t __s2 = __p2; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint64x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint64x2_t __ret; \ - __ret = __s0 - vmull_u32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret = __s0 + vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint64x2_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ + uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_u32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = __s0 - vmull_u16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret = __s0 + vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ + uint16x8_t __s1 = __p1; \ uint16x8_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_u16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ + int32x4_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = __s0 - vmull_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret = __s0 + vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ + int32x4_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int64x2_t __ret; \ - __ret = __rev0 - __noswap_vmull_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ + int16x8_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = __s0 - vmull_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret = __s0 + vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ +#define vmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ + int16x8_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __rev0 - __noswap_vmull_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __rev0 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vmov_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; - return __ret; -} -#else -__ai poly64x1_t vmov_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly64x2_t vmovq_n_p64(poly64_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t) {__p0, __p0}; - return __ret; -} -#else -__ai poly64x2_t vmovq_n_p64(poly64_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmovq_n_f64(float64_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) {__p0, __p0}; - return __ret; -} -#else -__ai float64x2_t vmovq_n_f64(float64_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmov_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; - return __ret; -} -#else -__ai float64x1_t vmov_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_124) { - uint16x8_t __ret_124; - uint8x8_t __a1_124 = vget_high_u8(__p0_124); - __ret_124 = (uint16x8_t)(vshll_n_u8(__a1_124, 0)); - return __ret_124; -} -#else -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_125) { - uint8x16_t __rev0_125; __rev0_125 = __builtin_shufflevector(__p0_125, __p0_125, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret_125; - uint8x8_t __a1_125 = __noswap_vget_high_u8(__rev0_125); - __ret_125 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_125, 0)); - __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_125; -} -__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_126) { - uint16x8_t __ret_126; - uint8x8_t __a1_126 = __noswap_vget_high_u8(__p0_126); - __ret_126 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_126, 0)); - return __ret_126; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_127) { - uint64x2_t __ret_127; - uint32x2_t __a1_127 = vget_high_u32(__p0_127); - __ret_127 = (uint64x2_t)(vshll_n_u32(__a1_127, 0)); - return __ret_127; -} -#else -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_128) { - uint32x4_t __rev0_128; __rev0_128 = __builtin_shufflevector(__p0_128, __p0_128, 3, 2, 1, 0); - uint64x2_t __ret_128; - uint32x2_t __a1_128 = __noswap_vget_high_u32(__rev0_128); - __ret_128 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_128, 0)); - __ret_128 = __builtin_shufflevector(__ret_128, __ret_128, 1, 0); - return __ret_128; -} -__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_129) { - uint64x2_t __ret_129; - uint32x2_t __a1_129 = __noswap_vget_high_u32(__p0_129); - __ret_129 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_129, 0)); - return __ret_129; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_130) { - uint32x4_t __ret_130; - uint16x4_t __a1_130 = vget_high_u16(__p0_130); - __ret_130 = (uint32x4_t)(vshll_n_u16(__a1_130, 0)); - return __ret_130; -} -#else -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_131) { - uint16x8_t __rev0_131; __rev0_131 = __builtin_shufflevector(__p0_131, __p0_131, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __ret_131; - uint16x4_t __a1_131 = __noswap_vget_high_u16(__rev0_131); - __ret_131 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_131, 0)); - __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, 3, 2, 1, 0); - return __ret_131; -} -__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_132) { - uint32x4_t __ret_132; - uint16x4_t __a1_132 = __noswap_vget_high_u16(__p0_132); - __ret_132 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_132, 0)); - return __ret_132; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_133) { - int16x8_t __ret_133; - int8x8_t __a1_133 = vget_high_s8(__p0_133); - __ret_133 = (int16x8_t)(vshll_n_s8(__a1_133, 0)); - return __ret_133; -} -#else -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_134) { - int8x16_t __rev0_134; __rev0_134 = __builtin_shufflevector(__p0_134, __p0_134, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret_134; - int8x8_t __a1_134 = __noswap_vget_high_s8(__rev0_134); - __ret_134 = (int16x8_t)(__noswap_vshll_n_s8(__a1_134, 0)); - __ret_134 = __builtin_shufflevector(__ret_134, __ret_134, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_134; -} -__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_135) { - int16x8_t __ret_135; - int8x8_t __a1_135 = __noswap_vget_high_s8(__p0_135); - __ret_135 = (int16x8_t)(__noswap_vshll_n_s8(__a1_135, 0)); - return __ret_135; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_136) { - int64x2_t __ret_136; - int32x2_t __a1_136 = vget_high_s32(__p0_136); - __ret_136 = (int64x2_t)(vshll_n_s32(__a1_136, 0)); - return __ret_136; -} -#else -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_137) { - int32x4_t __rev0_137; __rev0_137 = __builtin_shufflevector(__p0_137, __p0_137, 3, 2, 1, 0); - int64x2_t __ret_137; - int32x2_t __a1_137 = __noswap_vget_high_s32(__rev0_137); - __ret_137 = (int64x2_t)(__noswap_vshll_n_s32(__a1_137, 0)); - __ret_137 = __builtin_shufflevector(__ret_137, __ret_137, 1, 0); - return __ret_137; -} -__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_138) { - int64x2_t __ret_138; - int32x2_t __a1_138 = __noswap_vget_high_s32(__p0_138); - __ret_138 = (int64x2_t)(__noswap_vshll_n_s32(__a1_138, 0)); - return __ret_138; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_139) { - int32x4_t __ret_139; - int16x4_t __a1_139 = vget_high_s16(__p0_139); - __ret_139 = (int32x4_t)(vshll_n_s16(__a1_139, 0)); - return __ret_139; -} -#else -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_140) { - int16x8_t __rev0_140; __rev0_140 = __builtin_shufflevector(__p0_140, __p0_140, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret_140; - int16x4_t __a1_140 = __noswap_vget_high_s16(__rev0_140); - __ret_140 = (int32x4_t)(__noswap_vshll_n_s16(__a1_140, 0)); - __ret_140 = __builtin_shufflevector(__ret_140, __ret_140, 3, 2, 1, 0); - return __ret_140; -} -__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_141) { - int32x4_t __ret_141; - int16x4_t __a1_141 = __noswap_vget_high_s16(__p0_141); - __ret_141 = (int32x4_t)(__noswap_vshll_n_s16(__a1_141, 0)); - return __ret_141; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { - uint16x8_t __ret; - __ret = vcombine_u16(__p0, vmovn_u32(__p1)); - return __ret; -} -#else -__ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16(__rev0, __noswap_vmovn_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { - uint32x4_t __ret; - __ret = vcombine_u32(__p0, vmovn_u64(__p1)); - return __ret; -} -#else -__ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32(__rev0, __noswap_vmovn_u64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { - uint8x16_t __ret; - __ret = vcombine_u8(__p0, vmovn_u16(__p1)); - return __ret; -} -#else -__ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __noswap_vcombine_u8(__rev0, __noswap_vmovn_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x8_t __ret; - __ret = vcombine_s16(__p0, vmovn_s32(__p1)); - return __ret; -} -#else -__ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vcombine_s16(__rev0, __noswap_vmovn_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x4_t __ret; - __ret = vcombine_s32(__p0, vmovn_s64(__p1)); - return __ret; -} -#else -__ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x4_t __ret; - __ret = __noswap_vcombine_s32(__rev0, __noswap_vmovn_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x16_t __ret; - __ret = vcombine_s8(__p0, vmovn_s16(__p1)); - return __ret; -} -#else -__ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __noswap_vcombine_s8(__rev0, __noswap_vmovn_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = __p0 * __p1; - return __ret; -} -#else -__ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 * __p1; - return __ret; -} -#else -__ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 * __p1; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmuld_lane_f64(__p0_142, __p1_142, __p2_142) __extension__ ({ \ - float64_t __s0_142 = __p0_142; \ - float64x1_t __s1_142 = __p1_142; \ - float64_t __ret_142; \ - __ret_142 = __s0_142 * vget_lane_f64(__s1_142, __p2_142); \ - __ret_142; \ -}) -#else -#define vmuld_lane_f64(__p0_143, __p1_143, __p2_143) __extension__ ({ \ - float64_t __s0_143 = __p0_143; \ - float64x1_t __s1_143 = __p1_143; \ - float64_t __ret_143; \ - __ret_143 = __s0_143 * __noswap_vget_lane_f64(__s1_143, __p2_143); \ - __ret_143; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmuls_lane_f32(__p0_144, __p1_144, __p2_144) __extension__ ({ \ - float32_t __s0_144 = __p0_144; \ - float32x2_t __s1_144 = __p1_144; \ - float32_t __ret_144; \ - __ret_144 = __s0_144 * vget_lane_f32(__s1_144, __p2_144); \ - __ret_144; \ -}) -#else -#define vmuls_lane_f32(__p0_145, __p1_145, __p2_145) __extension__ ({ \ - float32_t __s0_145 = __p0_145; \ - float32x2_t __s1_145 = __p1_145; \ - float32x2_t __rev1_145; __rev1_145 = __builtin_shufflevector(__s1_145, __s1_145, 1, 0); \ - float32_t __ret_145; \ - __ret_145 = __s0_145 * __noswap_vget_lane_f32(__rev1_145, __p2_145); \ - __ret_145; \ +#define vmlal_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ + uint64x2_t __ret; \ + __ret = __s0 + vmull_u32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) +#else +#define vmlal_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + uint64x2_t __ret; \ + __ret = __rev0 + __noswap_vmull_u32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ +#define vmlal_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ + uint32x4_t __ret; \ + __ret = __s0 + vmull_u16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ +#define vmlal_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __rev0 + __noswap_vmull_u16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ +#define vmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = __s0 + vmull_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmulq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ +#define vmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __rev0 + __noswap_vmull_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmuld_laneq_f64(__p0_146, __p1_146, __p2_146) __extension__ ({ \ - float64_t __s0_146 = __p0_146; \ - float64x2_t __s1_146 = __p1_146; \ - float64_t __ret_146; \ - __ret_146 = __s0_146 * vgetq_lane_f64(__s1_146, __p2_146); \ - __ret_146; \ +#define vmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = __s0 + vmull_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ }) #else -#define vmuld_laneq_f64(__p0_147, __p1_147, __p2_147) __extension__ ({ \ - float64_t __s0_147 = __p0_147; \ - float64x2_t __s1_147 = __p1_147; \ - float64x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, 1, 0); \ - float64_t __ret_147; \ - __ret_147 = __s0_147 * __noswap_vgetq_lane_f64(__rev1_147, __p2_147); \ - __ret_147; \ +#define vmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __rev0 + __noswap_vmull_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmuls_laneq_f32(__p0_148, __p1_148, __p2_148) __extension__ ({ \ - float32_t __s0_148 = __p0_148; \ - float32x4_t __s1_148 = __p1_148; \ - float32_t __ret_148; \ - __ret_148 = __s0_148 * vgetq_lane_f32(__s1_148, __p2_148); \ - __ret_148; \ -}) +__ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = __p0 - __p1 * __p2; + return __ret; +} #else -#define vmuls_laneq_f32(__p0_149, __p1_149, __p2_149) __extension__ ({ \ - float32_t __s0_149 = __p0_149; \ - float32x4_t __s1_149 = __p1_149; \ - float32x4_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, 3, 2, 1, 0); \ - float32_t __ret_149; \ - __ret_149 = __s0_149 * __noswap_vgetq_lane_f32(__rev1_149, __p2_149); \ - __ret_149; \ -}) +__ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = __rev0 - __rev1 * __rev2; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 10); \ - __ret; \ -}) +__ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = __p0 - __p1 * __p2; + return __ret; +} #else -#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__rev1, __p2, 10); \ - __ret; \ -}) +__ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = __p0 - __p1 * __p2; + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmulq_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ uint16x8_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmulq_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ - __ret; \ -}) -#else -#define vmulq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ float32x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmulq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float32x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmulq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int16x8_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmulq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vmlsq_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ uint32x2_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmul_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x2_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ uint16x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmul_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ uint16x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ + float32x2_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ float32x2_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmul_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __s0 = __p0; \ - float32x4_t __s1 = __p1; \ + float32x2_t __s1 = __p1; \ + float32x4_t __s2 = __p2; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float32x2_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int32x2_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3); \ __ret; \ }) #else -#define vmul_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x2_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int16x4_t __ret; \ - __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret = __s0 - __s1 * __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3); \ __ret; \ }) #else -#define vmul_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vmls_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __rev0 - __rev1 * __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); - return __ret; -} -#else -__ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { +__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; - __ret = __p0 * (float64x2_t) {__p1, __p1}; + __ret = __p0 - __p1 * (float64x2_t) {__p2, __p2}; return __ret; } #else -__ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { +__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __ret; - __ret = __rev0 * (float64x2_t) {__p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); - return __ret; -} -#else -__ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); - return __ret; -} -__ai poly128_t __noswap_vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { - poly16x8_t __ret; - __ret = vmull_p8(vget_high_p8(__p0), vget_high_p8(__p1)); - return __ret; -} -#else -__ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __ret; - __ret = __noswap_vmull_p8(__noswap_vget_high_p8(__rev0), __noswap_vget_high_p8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint16x8_t __ret; - __ret = vmull_u8(vget_high_u8(__p0), vget_high_u8(__p1)); - return __ret; -} -#else -__ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vmull_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint64x2_t __ret; - __ret = vmull_u32(vget_high_u32(__p0), vget_high_u32(__p1)); - return __ret; -} -#else -__ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint64x2_t __ret; - __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint32x4_t __ret; - __ret = vmull_u16(vget_high_u16(__p0), vget_high_u16(__p1)); - return __ret; -} -#else -__ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { - int16x8_t __ret; - __ret = vmull_s8(vget_high_s8(__p0), vget_high_s8(__p1)); - return __ret; -} -#else -__ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vmull_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { - int64x2_t __ret; - __ret = vmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); - return __ret; -} -#else -__ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); + __ret = __rev0 - __rev1 * (float64x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { - int32x4_t __ret; - __ret = vmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); - return __ret; -} -#else -__ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { - poly128_t __ret; - __ret = vmull_p64((poly64_t)(vget_high_p64(__p0)), (poly64_t)(vget_high_p64(__p1))); - return __ret; -} -#else -__ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - poly128_t __ret; - __ret = __noswap_vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ +#define vmlsl_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x2_t __s2 = __p2; \ uint64x2_t __ret; \ - __ret = vmull_u32(vget_high_u32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 - vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_lane_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ - uint32x2_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ +#define vmlsl_high_lane_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x2_t __s2 = __p2; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ uint64x2_t __ret; \ - __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ +#define vmlsl_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x4_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = vmull_u16(vget_high_u16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = __s0 - vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_lane_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ - uint16x4_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ +#define vmlsl_high_lane_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x4_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ +#define vmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = vmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 - vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ +#define vmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ int64x2_t __ret; \ - __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ +#define vmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = vmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = __s0 - vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ +#define vmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vmlsl_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ uint64x2_t __ret; \ - __ret = vmull_u32(vget_high_u32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 - vmull_u32(vget_high_u32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x4_t __s0 = __p0; \ +#define vmlsl_high_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __s2 = __p2; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ uint64x2_t __ret; \ - __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vmlsl_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ uint32x4_t __ret; \ - __ret = vmull_u16(vget_high_u16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = __s0 - vmull_u16(vget_high_u16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x8_t __s0 = __p0; \ +#define vmlsl_high_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = vmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 - vmull_s32(vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int64x2_t __ret; \ - __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = vmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = __s0 - vmull_s16(vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __rev0 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { - uint64x2_t __ret; - __ret = vmull_n_u32(vget_high_u32(__p0), __p1); +#define vmlsl_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ + uint64x2_t __ret; \ + __ret = __s0 - vmull_u32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) +#else +#define vmlsl_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint64x2_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __s2 = __p2; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + uint64x2_t __ret; \ + __ret = __rev0 - __noswap_vmull_u32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ + uint32x4_t __ret; \ + __ret = __s0 - vmull_u16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) +#else +#define vmlsl_laneq_u16(__p0, __p1, __p2, __p3) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __s2 = __p2; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __rev0 - __noswap_vmull_u16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = __s0 - vmull_s32(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) +#else +#define vmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __rev0 - __noswap_vmull_s32(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = __s0 - vmull_s16(__s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) +#else +#define vmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __rev0 - __noswap_vmull_s16(__rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai poly64x1_t vmov_n_p64(poly64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t) {__p0}; return __ret; } #else -__ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint64x2_t __ret; - __ret = __noswap_vmull_n_u32(__noswap_vget_high_u32(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai poly64x1_t vmov_n_p64(poly64_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t) {__p0}; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { - uint32x4_t __ret; - __ret = vmull_n_u16(vget_high_u16(__p0), __p1); +__ai poly64x2_t vmovq_n_p64(poly64_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t) {__p0, __p0}; return __ret; } #else -__ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vmull_n_u16(__noswap_vget_high_u16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai poly64x2_t vmovq_n_p64(poly64_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t) {__p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { - int64x2_t __ret; - __ret = vmull_n_s32(vget_high_s32(__p0), __p1); +__ai float64x2_t vmovq_n_f64(float64_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) {__p0, __p0}; return __ret; } #else -__ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); +__ai float64x2_t vmovq_n_f64(float64_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { - int32x4_t __ret; - __ret = vmull_n_s16(vget_high_s16(__p0), __p1); +__ai float64x1_t vmov_n_f64(float64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) {__p0}; return __ret; } #else -__ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vmov_n_f64(float64_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) {__p0}; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint64x2_t __ret; \ - __ret = vmull_u32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ - __ret; \ -}) +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_124) { + uint16x8_t __ret_124; + uint8x8_t __a1_124 = vget_high_u8(__p0_124); + __ret_124 = (uint16x8_t)(vshll_n_u8(__a1_124, 0)); + return __ret_124; +} #else -#define vmull_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ - uint32x2_t __s0 = __p0; \ - uint32x4_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint64x2_t __ret; \ - __ret = __noswap_vmull_u32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_125) { + uint8x16_t __rev0_125; __rev0_125 = __builtin_shufflevector(__p0_125, __p0_125, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret_125; + uint8x8_t __a1_125 = __noswap_vget_high_u8(__rev0_125); + __ret_125 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_125, 0)); + __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret_125; +} +__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_126) { + uint16x8_t __ret_126; + uint8x8_t __a1_126 = __noswap_vget_high_u8(__p0_126); + __ret_126 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_126, 0)); + return __ret_126; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint32x4_t __ret; \ - __ret = vmull_u16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_127) { + uint64x2_t __ret_127; + uint32x2_t __a1_127 = vget_high_u32(__p0_127); + __ret_127 = (uint64x2_t)(vshll_n_u32(__a1_127, 0)); + return __ret_127; +} #else -#define vmull_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ - uint16x4_t __s0 = __p0; \ - uint16x8_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret; \ - __ret = __noswap_vmull_u16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_128) { + uint32x4_t __rev0_128; __rev0_128 = __builtin_shufflevector(__p0_128, __p0_128, 3, 2, 1, 0); + uint64x2_t __ret_128; + uint32x2_t __a1_128 = __noswap_vget_high_u32(__rev0_128); + __ret_128 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_128, 0)); + __ret_128 = __builtin_shufflevector(__ret_128, __ret_128, 1, 0); + return __ret_128; +} +__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_129) { + uint64x2_t __ret_129; + uint32x2_t __a1_129 = __noswap_vget_high_u32(__p0_129); + __ret_129 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_129, 0)); + return __ret_129; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = vmull_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ - __ret; \ -}) +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_130) { + uint32x4_t __ret_130; + uint16x4_t __a1_130 = vget_high_u16(__p0_130); + __ret_130 = (uint32x4_t)(vshll_n_u16(__a1_130, 0)); + return __ret_130; +} #else -#define vmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vmull_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_131) { + uint16x8_t __rev0_131; __rev0_131 = __builtin_shufflevector(__p0_131, __p0_131, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __ret_131; + uint16x4_t __a1_131 = __noswap_vget_high_u16(__rev0_131); + __ret_131 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_131, 0)); + __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, 3, 2, 1, 0); + return __ret_131; +} +__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_132) { + uint32x4_t __ret_132; + uint16x4_t __a1_132 = __noswap_vget_high_u16(__p0_132); + __ret_132 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_132, 0)); + return __ret_132; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = vmull_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_133) { + int16x8_t __ret_133; + int8x8_t __a1_133 = vget_high_s8(__p0_133); + __ret_133 = (int16x8_t)(vshll_n_s8(__a1_133, 0)); + return __ret_133; +} #else -#define vmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vmull_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_134) { + int8x16_t __rev0_134; __rev0_134 = __builtin_shufflevector(__p0_134, __p0_134, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret_134; + int8x8_t __a1_134 = __noswap_vget_high_s8(__rev0_134); + __ret_134 = (int16x8_t)(__noswap_vshll_n_s8(__a1_134, 0)); + __ret_134 = __builtin_shufflevector(__ret_134, __ret_134, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret_134; +} +__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_135) { + int16x8_t __ret_135; + int8x8_t __a1_135 = __noswap_vget_high_s8(__p0_135); + __ret_135 = (int16x8_t)(__noswap_vshll_n_s8(__a1_135, 0)); + return __ret_135; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_136) { + int64x2_t __ret_136; + int32x2_t __a1_136 = vget_high_s32(__p0_136); + __ret_136 = (int64x2_t)(vshll_n_s32(__a1_136, 0)); + return __ret_136; +} +#else +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_137) { + int32x4_t __rev0_137; __rev0_137 = __builtin_shufflevector(__p0_137, __p0_137, 3, 2, 1, 0); + int64x2_t __ret_137; + int32x2_t __a1_137 = __noswap_vget_high_s32(__rev0_137); + __ret_137 = (int64x2_t)(__noswap_vshll_n_s32(__a1_137, 0)); + __ret_137 = __builtin_shufflevector(__ret_137, __ret_137, 1, 0); + return __ret_137; +} +__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_138) { + int64x2_t __ret_138; + int32x2_t __a1_138 = __noswap_vget_high_s32(__p0_138); + __ret_138 = (int64x2_t)(__noswap_vshll_n_s32(__a1_138, 0)); + return __ret_138; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_139) { + int32x4_t __ret_139; + int16x4_t __a1_139 = vget_high_s16(__p0_139); + __ret_139 = (int32x4_t)(vshll_n_s16(__a1_139, 0)); + return __ret_139; +} +#else +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_140) { + int16x8_t __rev0_140; __rev0_140 = __builtin_shufflevector(__p0_140, __p0_140, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret_140; + int16x4_t __a1_140 = __noswap_vget_high_s16(__rev0_140); + __ret_140 = (int32x4_t)(__noswap_vshll_n_s16(__a1_140, 0)); + __ret_140 = __builtin_shufflevector(__ret_140, __ret_140, 3, 2, 1, 0); + return __ret_140; +} +__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_141) { + int32x4_t __ret_141; + int16x4_t __a1_141 = __noswap_vget_high_s16(__p0_141); + __ret_141 = (int32x4_t)(__noswap_vshll_n_s16(__a1_141, 0)); + return __ret_141; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { + uint16x8_t __ret; + __ret = vcombine_u16(__p0, vmovn_u32(__p1)); + return __ret; +} +#else +__ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16(__rev0, __noswap_vmovn_u32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { + uint32x4_t __ret; + __ret = vcombine_u32(__p0, vmovn_u64(__p1)); + return __ret; +} +#else +__ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vcombine_u32(__rev0, __noswap_vmovn_u64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { + uint8x16_t __ret; + __ret = vcombine_u8(__p0, vmovn_u16(__p1)); return __ret; } #else -__ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float64x2_t __noswap_vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8(__rev0, __noswap_vmovn_u16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { + int16x8_t __ret; + __ret = vcombine_s16(__p0, vmovn_s32(__p1)); return __ret; } #else -__ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai float32x4_t __noswap_vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vcombine_s16(__rev0, __noswap_vmovn_s32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { + int32x4_t __ret; + __ret = vcombine_s32(__p0, vmovn_s64(__p1)); return __ret; } #else -__ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x4_t __ret; + __ret = __noswap_vcombine_s32(__rev0, __noswap_vmovn_s64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { + int8x16_t __ret; + __ret = vcombine_s8(__p0, vmovn_s16(__p1)); return __ret; } #else -__ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -__ai float32x2_t __noswap_vmulx_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __noswap_vcombine_s8(__rev0, __noswap_vmovn_s16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); +__ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = __p0 * __p1; return __ret; } #else -__ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); - return __ret; -} -__ai float64_t __noswap_vmulxd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); +__ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = __rev0 * __rev1; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); +__ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 * __p1; return __ret; } #else -__ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); - return __ret; -} -__ai float32_t __noswap_vmulxs_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); +__ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = __p0 * __p1; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxd_lane_f64(__p0_150, __p1_150, __p2_150) __extension__ ({ \ - float64_t __s0_150 = __p0_150; \ - float64x1_t __s1_150 = __p1_150; \ - float64_t __ret_150; \ - __ret_150 = vmulxd_f64(__s0_150, vget_lane_f64(__s1_150, __p2_150)); \ - __ret_150; \ +#define vmuld_lane_f64(__p0_142, __p1_142, __p2_142) __extension__ ({ \ + float64_t __s0_142 = __p0_142; \ + float64x1_t __s1_142 = __p1_142; \ + float64_t __ret_142; \ + __ret_142 = __s0_142 * vget_lane_f64(__s1_142, __p2_142); \ + __ret_142; \ }) #else -#define vmulxd_lane_f64(__p0_151, __p1_151, __p2_151) __extension__ ({ \ - float64_t __s0_151 = __p0_151; \ - float64x1_t __s1_151 = __p1_151; \ - float64_t __ret_151; \ - __ret_151 = __noswap_vmulxd_f64(__s0_151, __noswap_vget_lane_f64(__s1_151, __p2_151)); \ - __ret_151; \ +#define vmuld_lane_f64(__p0_143, __p1_143, __p2_143) __extension__ ({ \ + float64_t __s0_143 = __p0_143; \ + float64x1_t __s1_143 = __p1_143; \ + float64_t __ret_143; \ + __ret_143 = __s0_143 * __noswap_vget_lane_f64(__s1_143, __p2_143); \ + __ret_143; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxs_lane_f32(__p0_152, __p1_152, __p2_152) __extension__ ({ \ - float32_t __s0_152 = __p0_152; \ - float32x2_t __s1_152 = __p1_152; \ - float32_t __ret_152; \ - __ret_152 = vmulxs_f32(__s0_152, vget_lane_f32(__s1_152, __p2_152)); \ - __ret_152; \ +#define vmuls_lane_f32(__p0_144, __p1_144, __p2_144) __extension__ ({ \ + float32_t __s0_144 = __p0_144; \ + float32x2_t __s1_144 = __p1_144; \ + float32_t __ret_144; \ + __ret_144 = __s0_144 * vget_lane_f32(__s1_144, __p2_144); \ + __ret_144; \ }) #else -#define vmulxs_lane_f32(__p0_153, __p1_153, __p2_153) __extension__ ({ \ - float32_t __s0_153 = __p0_153; \ - float32x2_t __s1_153 = __p1_153; \ - float32x2_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 1, 0); \ - float32_t __ret_153; \ - __ret_153 = __noswap_vmulxs_f32(__s0_153, __noswap_vget_lane_f32(__rev1_153, __p2_153)); \ - __ret_153; \ +#define vmuls_lane_f32(__p0_145, __p1_145, __p2_145) __extension__ ({ \ + float32_t __s0_145 = __p0_145; \ + float32x2_t __s1_145 = __p1_145; \ + float32x2_t __rev1_145; __rev1_145 = __builtin_shufflevector(__s1_145, __s1_145, 1, 0); \ + float32_t __ret_145; \ + __ret_145 = __s0_145 * __noswap_vget_lane_f32(__rev1_145, __p2_145); \ + __ret_145; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ +#define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ + __ret; \ +}) +#else +#define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __ret; \ - __ret = vmulxq_f64(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ __ret; \ }) #else -#define vmulxq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __ret; \ - __ret = __noswap_vmulxq_f64(__rev0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __rev0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __ret; \ - __ret = vmulxq_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ +#define vmuld_laneq_f64(__p0_146, __p1_146, __p2_146) __extension__ ({ \ + float64_t __s0_146 = __p0_146; \ + float64x2_t __s1_146 = __p1_146; \ + float64_t __ret_146; \ + __ret_146 = __s0_146 * vgetq_lane_f64(__s1_146, __p2_146); \ + __ret_146; \ }) #else -#define vmulxq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x4_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x4_t __ret; \ - __ret = __noswap_vmulxq_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ +#define vmuld_laneq_f64(__p0_147, __p1_147, __p2_147) __extension__ ({ \ + float64_t __s0_147 = __p0_147; \ + float64x2_t __s1_147 = __p1_147; \ + float64x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, 1, 0); \ + float64_t __ret_147; \ + __ret_147 = __s0_147 * __noswap_vgetq_lane_f64(__rev1_147, __p2_147); \ + __ret_147; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __ret; \ - __ret = vmulx_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ +#define vmuls_laneq_f32(__p0_148, __p1_148, __p2_148) __extension__ ({ \ + float32_t __s0_148 = __p0_148; \ + float32x4_t __s1_148 = __p1_148; \ + float32_t __ret_148; \ + __ret_148 = __s0_148 * vgetq_lane_f32(__s1_148, __p2_148); \ + __ret_148; \ +}) +#else +#define vmuls_laneq_f32(__p0_149, __p1_149, __p2_149) __extension__ ({ \ + float32_t __s0_149 = __p0_149; \ + float32x4_t __s1_149 = __p1_149; \ + float32x4_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, 3, 2, 1, 0); \ + float32_t __ret_149; \ + __ret_149 = __s0_149 * __noswap_vgetq_lane_f32(__rev1_149, __p2_149); \ + __ret_149; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 10); \ __ret; \ }) #else -#define vmulx_lane_f32(__p0, __p1, __p2) __extension__ ({ \ - float32x2_t __s0 = __p0; \ - float32x2_t __s1 = __p1; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x2_t __ret; \ - __ret = __noswap_vmulx_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ +#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__rev1, __p2, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxd_laneq_f64(__p0_154, __p1_154, __p2_154) __extension__ ({ \ - float64_t __s0_154 = __p0_154; \ - float64x2_t __s1_154 = __p1_154; \ - float64_t __ret_154; \ - __ret_154 = vmulxd_f64(__s0_154, vgetq_lane_f64(__s1_154, __p2_154)); \ - __ret_154; \ +#define vmulq_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ }) #else -#define vmulxd_laneq_f64(__p0_155, __p1_155, __p2_155) __extension__ ({ \ - float64_t __s0_155 = __p0_155; \ - float64x2_t __s1_155 = __p1_155; \ - float64x2_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 1, 0); \ - float64_t __ret_155; \ - __ret_155 = __noswap_vmulxd_f64(__s0_155, __noswap_vgetq_lane_f64(__rev1_155, __p2_155)); \ - __ret_155; \ +#define vmulq_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxs_laneq_f32(__p0_156, __p1_156, __p2_156) __extension__ ({ \ - float32_t __s0_156 = __p0_156; \ - float32x4_t __s1_156 = __p1_156; \ - float32_t __ret_156; \ - __ret_156 = vmulxs_f32(__s0_156, vgetq_lane_f32(__s1_156, __p2_156)); \ - __ret_156; \ +#define vmulq_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret; \ }) #else -#define vmulxs_laneq_f32(__p0_157, __p1_157, __p2_157) __extension__ ({ \ - float32_t __s0_157 = __p0_157; \ - float32x4_t __s1_157 = __p1_157; \ - float32x4_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 3, 2, 1, 0); \ - float32_t __ret_157; \ - __ret_157 = __noswap_vmulxs_f32(__s0_157, __noswap_vgetq_lane_f32(__rev1_157, __p2_157)); \ - __ret_157; \ +#define vmulq_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __ret; \ - __ret = vmulxq_f64(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ __ret; \ }) #else -#define vmulxq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float64x2_t __ret; \ - __ret = __noswap_vmulxq_f64(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __ret; \ - __ret = vmulxq_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ __ret; \ }) #else -#define vmulxq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x4_t __ret; \ - __ret = __noswap_vmulxq_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmulq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ +}) +#else +#define vmulq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret; \ +}) +#else +#define vmulq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x2_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ + __ret; \ +}) +#else +#define vmul_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x2_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ +}) +#else +#define vmul_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __ret; \ - __ret = vmulx_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ __ret; \ }) #else -#define vmulx_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ +#define vmul_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x2_t __ret; \ - __ret = __noswap_vmulx_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vnegq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = -__p0; - return __ret; -} +#define vmul_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2); \ + __ret; \ +}) #else -__ai float64x2_t vnegq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vmul_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vnegq_s64(int64x2_t __p0) { - int64x2_t __ret; - __ret = -__p0; - return __ret; -} +#define vmul_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = __s0 * __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2); \ + __ret; \ +}) #else -__ai int64x2_t vnegq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vmul_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __rev0 * __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vneg_f64(float64x1_t __p0) { +__ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { float64x1_t __ret; - __ret = -__p0; + __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); return __ret; } #else -__ai float64x1_t vneg_f64(float64x1_t __p0) { +__ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { float64x1_t __ret; - __ret = -__p0; + __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = -__p0; +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { + float64x2_t __ret; + __ret = __p0 * (float64x2_t) {__p1, __p1}; return __ret; } #else -__ai int64x1_t vneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = -__p0; +__ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = __rev0 * (float64x2_t) {__p1, __p1}; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); +__ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { + poly128_t __ret; + __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); return __ret; } #else -__ai int64_t vnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); +__ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { + poly128_t __ret; + __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); + return __ret; +} +__ai poly128_t __noswap_vmull_p64(poly64_t __p0, poly64_t __p1) { + poly128_t __ret; + __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); +__ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { + poly16x8_t __ret; + __ret = vmull_p8(vget_high_p8(__p0), vget_high_p8(__p1)); return __ret; } #else -__ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __ret; + __ret = __noswap_vmull_p8(__noswap_vget_high_p8(__rev0), __noswap_vget_high_p8(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); +__ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint16x8_t __ret; + __ret = vmull_u8(vget_high_u8(__p0), vget_high_u8(__p1)); return __ret; } #else -__ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vmull_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { +__ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = vmull_u32(vget_high_u32(__p0), vget_high_u32(__p1)); return __ret; } #else -__ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); +__ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint32x4_t __ret; + __ret = vmull_u16(vget_high_u16(__p0), vget_high_u16(__p1)); return __ret; } #else -__ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); +__ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { + int16x8_t __ret; + __ret = vmull_s8(vget_high_s8(__p0), vget_high_s8(__p1)); return __ret; } #else -__ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vmull_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { + int64x2_t __ret; + __ret = vmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); return __ret; } #else -__ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); +__ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { + int32x4_t __ret; + __ret = vmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); return __ret; } #else -__ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); +__ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); +__ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { + poly128_t __ret; + __ret = vmull_p64((poly64_t)(vget_high_p64(__p0)), (poly64_t)(vget_high_p64(__p1))); return __ret; } #else -__ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly128_t __ret; + __ret = __noswap_vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); - return __ret; -} +#define vmull_high_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = vmull_u32(vget_high_u32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vmull_high_lane_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x2_t __s1 = __p1; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + uint64x2_t __ret; \ + __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} +#define vmull_high_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = vmull_u16(vget_high_u16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmull_high_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x4_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vpaddd_u64(uint64x2_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__p0); - return __ret; -} +#define vmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint64_t vpaddd_u64(uint64x2_t __p0) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__rev0); - return __ret; -} +#define vmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vpaddd_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__p0); - return __ret; -} +#define vmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai float64_t vpaddd_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__rev0); - return __ret; -} +#define vmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vpaddd_s64(int64x2_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__p0); - return __ret; -} +#define vmull_high_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = vmull_u32(vget_high_u32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai int64_t vpaddd_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64_t __ret; - __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__rev0); - return __ret; -} +#define vmull_high_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint64x2_t __ret; \ + __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vpadds_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__p0); - return __ret; -} +#define vmull_high_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = vmull_u16(vget_high_u16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai float32_t vpadds_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__rev0); - return __ret; -} +#define vmull_high_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); +#define vmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { + uint64x2_t __ret; + __ret = vmull_n_u32(vget_high_u32(__p0), __p1); return __ret; } #else -__ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint64x2_t __ret; + __ret = __noswap_vmull_n_u32(__noswap_vget_high_u32(__rev0), __p1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { +__ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = vmull_n_u16(vget_high_u16(__p0), __p1); return __ret; } #else -__ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); +__ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __noswap_vmull_n_u16(__noswap_vget_high_u16(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { + int64x2_t __ret; + __ret = vmull_n_s32(vget_high_s32(__p0), __p1); return __ret; } #else -__ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); +__ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { + int32x4_t __ret; + __ret = vmull_n_s16(vget_high_s16(__p0), __p1); return __ret; } #else -__ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { +#define vmull_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint64x2_t __ret; \ + __ret = vmull_u32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_laneq_u32(__p0, __p1, __p2) __extension__ ({ \ + uint32x2_t __s0 = __p0; \ + uint32x4_t __s1 = __p1; \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + uint64x2_t __ret; \ + __ret = __noswap_vmull_u32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmull_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint32x4_t __ret; \ + __ret = vmull_u16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x4_t __s0 = __p0; \ + uint16x8_t __s1 = __p1; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret; \ + __ret = __noswap_vmull_u16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vmull_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vmull_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vmull_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) +#else +#define vmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vmull_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { +__ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } +__ai float64x2_t __noswap_vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { +__ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { +__ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} -#else -__ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32x4_t __noswap_vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); +__ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #else -__ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vpmaxqd_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__p0); +__ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else -__ai float64_t vpmaxqd_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__rev0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32_t vpmaxs_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__p0); +__ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -#else -__ai float32_t vpmaxs_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__rev0); +__ai float32x2_t __noswap_vmulx_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); return __ret; } #else -__ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); return __ret; } -#else -__ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64_t __noswap_vmulxd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); return __ret; } #else -__ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); + return __ret; +} +__ai float32_t __noswap_vmulxs_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__p0); - return __ret; -} +#define vmulxd_lane_f64(__p0_150, __p1_150, __p2_150) __extension__ ({ \ + float64_t __s0_150 = __p0_150; \ + float64x1_t __s1_150 = __p1_150; \ + float64_t __ret_150; \ + __ret_150 = vmulxd_f64(__s0_150, vget_lane_f64(__s1_150, __p2_150)); \ + __ret_150; \ +}) #else -__ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__rev0); - return __ret; -} +#define vmulxd_lane_f64(__p0_151, __p1_151, __p2_151) __extension__ ({ \ + float64_t __s0_151 = __p0_151; \ + float64x1_t __s1_151 = __p1_151; \ + float64_t __ret_151; \ + __ret_151 = __noswap_vmulxd_f64(__s0_151, __noswap_vget_lane_f64(__s1_151, __p2_151)); \ + __ret_151; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vpmaxnms_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__p0); - return __ret; -} +#define vmulxs_lane_f32(__p0_152, __p1_152, __p2_152) __extension__ ({ \ + float32_t __s0_152 = __p0_152; \ + float32x2_t __s1_152 = __p1_152; \ + float32_t __ret_152; \ + __ret_152 = vmulxs_f32(__s0_152, vget_lane_f32(__s1_152, __p2_152)); \ + __ret_152; \ +}) #else -__ai float32_t vpmaxnms_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__rev0); - return __ret; -} +#define vmulxs_lane_f32(__p0_153, __p1_153, __p2_153) __extension__ ({ \ + float32_t __s0_153 = __p0_153; \ + float32x2_t __s1_153 = __p1_153; \ + float32x2_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 1, 0); \ + float32_t __ret_153; \ + __ret_153 = __noswap_vmulxs_f32(__s0_153, __noswap_vget_lane_f32(__rev1_153, __p2_153)); \ + __ret_153; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} +#define vmulxq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = vmulxq_f64(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmulxq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __ret; \ + __ret = __noswap_vmulxq_f64(__rev0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); - return __ret; -} +#define vmulxq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x4_t __ret; \ + __ret = vmulxq_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vmulxq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x4_t __ret; \ + __ret = __noswap_vmulxq_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); - return __ret; -} +#define vmulx_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __ret; \ + __ret = vmulx_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmulx_lane_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x2_t __s1 = __p1; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float32x2_t __ret; \ + __ret = __noswap_vmulx_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} +#define vmulxd_laneq_f64(__p0_154, __p1_154, __p2_154) __extension__ ({ \ + float64_t __s0_154 = __p0_154; \ + float64x2_t __s1_154 = __p1_154; \ + float64_t __ret_154; \ + __ret_154 = vmulxd_f64(__s0_154, vgetq_lane_f64(__s1_154, __p2_154)); \ + __ret_154; \ +}) #else -__ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmulxd_laneq_f64(__p0_155, __p1_155, __p2_155) __extension__ ({ \ + float64_t __s0_155 = __p0_155; \ + float64x2_t __s1_155 = __p1_155; \ + float64x2_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 1, 0); \ + float64_t __ret_155; \ + __ret_155 = __noswap_vmulxd_f64(__s0_155, __noswap_vgetq_lane_f64(__rev1_155, __p2_155)); \ + __ret_155; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); - return __ret; -} +#define vmulxs_laneq_f32(__p0_156, __p1_156, __p2_156) __extension__ ({ \ + float32_t __s0_156 = __p0_156; \ + float32x4_t __s1_156 = __p1_156; \ + float32_t __ret_156; \ + __ret_156 = vmulxs_f32(__s0_156, vgetq_lane_f32(__s1_156, __p2_156)); \ + __ret_156; \ +}) #else -__ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vmulxs_laneq_f32(__p0_157, __p1_157, __p2_157) __extension__ ({ \ + float32_t __s0_157 = __p0_157; \ + float32x4_t __s1_157 = __p1_157; \ + float32x4_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 3, 2, 1, 0); \ + float32_t __ret_157; \ + __ret_157 = __noswap_vmulxs_f32(__s0_157, __noswap_vgetq_lane_f32(__rev1_157, __p2_157)); \ + __ret_157; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); - return __ret; -} +#define vmulxq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = vmulxq_f64(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vmulxq_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __ret; \ + __ret = __noswap_vmulxq_f64(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); - return __ret; -} +#define vmulxq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __ret; \ + __ret = vmulxq_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) #else -__ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vmulxq_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x4_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x4_t __ret; \ + __ret = __noswap_vmulxq_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); - return __ret; -} +#define vmulx_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x2_t __ret; \ + __ret = vmulx_f32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret; \ +}) #else -__ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vmulx_laneq_f32(__p0, __p1, __p2) __extension__ ({ \ + float32x2_t __s0 = __p0; \ + float32x4_t __s1 = __p1; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float32x2_t __ret; \ + __ret = __noswap_vmulx_f32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vpminqd_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__p0); +__ai float64x2_t vnegq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = -__p0; return __ret; } #else -__ai float64_t vpminqd_f64(float64x2_t __p0) { +__ai float64x2_t vnegq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__rev0); + float64x2_t __ret; + __ret = -__rev0; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vpmins_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__p0); +__ai int64x2_t vnegq_s64(int64x2_t __p0) { + int64x2_t __ret; + __ret = -__p0; return __ret; } #else -__ai float32_t vpmins_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__rev0); +__ai int64x2_t vnegq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = -__rev0; + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai float64x1_t vneg_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = -__p0; return __ret; } #else -__ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vneg_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = -__p0; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); +__ai int64x1_t vneg_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = -__p0; return __ret; } #else -__ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x1_t vneg_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = -__p0; return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); +__ai int64_t vnegd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); return __ret; } #else -__ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int64_t vnegd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vpminnmqd_f64(float64x2_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__p0); +__ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai float64_t vpminnmqd_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__rev0); +__ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vpminnms_f32(float32x2_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__p0); +__ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai float32_t vpminnms_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32_t __ret; - __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__rev0); +__ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqabsq_s64(int64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 35); +__ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai int64x2_t vqabsq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 35); +__ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vqabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); +__ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai int64x1_t vqabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); +__ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqabsb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); +__ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int8_t vqabsb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); +__ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqabss_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); +__ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai int32_t vqabss_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); +__ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); +__ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai int64_t vqabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); +__ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqabsh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); +__ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai int16_t vqabsh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); +__ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); +__ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else -__ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); +__ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); +__ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); +__ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__p0); return __ret; } #else -__ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vpaddd_u64(uint64x2_t __p0) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); +__ai float64_t vpaddd_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__p0); return __ret; } #else -__ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); +__ai float64_t vpaddd_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); +__ai int64_t vpaddd_s64(int64x2_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__p0); return __ret; } #else -__ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); +__ai int64_t vpaddd_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64_t __ret; + __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); +__ai float32_t vpadds_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__p0); return __ret; } #else -__ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqadds_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); +__ai float32_t vpadds_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); +__ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); +__ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); +__ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqaddh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); +__ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); +__ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); +__ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); +__ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); +__ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { - int64x2_t __ret; - __ret = vqdmlal_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); +__ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); +__ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { - int32x4_t __ret; - __ret = vqdmlal_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); +__ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); +__ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlal_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vqdmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlal_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) -#else -#define vqdmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlal_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) -#else -#define vqdmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlal_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} #else -#define vqdmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { - int64x2_t __ret; - __ret = vqdmlal_n_s32(__p0, vget_high_s32(__p1), __p2); +__ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vqdmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { - int32x4_t __ret; - __ret = vqdmlal_n_s16(__p0, vget_high_s16(__p1), __p2); +__ai float64_t vpmaxqd_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__p0); return __ret; } #else -__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqdmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64_t vpmaxqd_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai float32_t vpmaxs_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__p0); + return __ret; +} #else -#define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) +__ai float32_t vpmaxs_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} #else -#define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) +__ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} #else -#define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) +__ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} #else -#define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) +__ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlal_s32(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__p0); + return __ret; +} #else -#define vqdmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlal_s32(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlal_s16(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai float32_t vpmaxnms_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__p0); + return __ret; +} #else -#define vqdmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlal_s16(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32_t vpmaxnms_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); +__ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); +__ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); +__ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); +__ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { - int64x2_t __ret; - __ret = vqdmlsl_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); +__ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { - int32x4_t __ret; - __ret = vqdmlsl_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); +__ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlsl_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} #else -#define vqdmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlsl_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} #else -#define vqdmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlsl_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + return __ret; +} #else -#define vqdmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __ret; + __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlsl_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + return __ret; +} #else -#define vqdmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret; + __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { - int64x2_t __ret; - __ret = vqdmlsl_n_s32(__p0, vget_high_s32(__p1), __p2); +__ai float64_t vpminqd_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__p0); return __ret; } #else -__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vqdmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64_t vpminqd_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { - int32x4_t __ret; - __ret = vqdmlsl_n_s16(__p0, vget_high_s16(__p1), __p2); +__ai float32_t vpmins_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__p0); return __ret; } #else -__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqdmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32_t vpmins_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} #else -#define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x2_t __s2 = __p2; \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) +__ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) +__ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} #else -#define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x4_t __s2 = __p2; \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ - __ret; \ -}) +__ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} #else -#define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) +__ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ - __ret; \ -}) +__ai float64_t vpminnmqd_f64(float64x2_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__p0); + return __ret; +} #else -#define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ - __ret; \ -}) +__ai float64_t vpminnmqd_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64_t __ret; + __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __ret; \ - __ret = vqdmlsl_s32(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ - __ret; \ -}) +__ai float32_t vpminnms_f32(float32x2_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__p0); + return __ret; +} #else -#define vqdmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ - int64x2_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __s2 = __p2; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmlsl_s32(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai float32_t vpminnms_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32_t __ret; + __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__rev0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __ret; \ - __ret = vqdmlsl_s16(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ - __ret; \ -}) +__ai int64x2_t vqabsq_s64(int64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 35); + return __ret; +} #else -#define vqdmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __s2 = __p2; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmlsl_s16(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int64x2_t vqabsq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); +__ai int64x1_t vqabs_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); return __ret; } #else -__ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { +__ai int64x1_t vqabs_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8_t vqabsb_s8(int8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); + return __ret; +} +#else +__ai int8_t vqabsb_s8(int8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32_t vqabss_s32(int32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); return __ret; } -__ai int32_t __noswap_vqdmulhs_s32(int32_t __p0, int32_t __p1) { +#else +__ai int32_t vqabss_s32(int32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); +__ai int64_t vqabsd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); return __ret; } #else -__ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { +__ai int64_t vqabsd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16_t vqabsh_s16(int16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); return __ret; } -__ai int16_t __noswap_vqdmulhh_s16(int16_t __p0, int16_t __p1) { +#else +__ai int16_t vqabsh_s16(int16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_lane_s32(__p0_158, __p1_158, __p2_158) __extension__ ({ \ - int32_t __s0_158 = __p0_158; \ - int32x2_t __s1_158 = __p1_158; \ - int32_t __ret_158; \ - __ret_158 = vqdmulhs_s32(__s0_158, vget_lane_s32(__s1_158, __p2_158)); \ - __ret_158; \ -}) +__ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); + return __ret; +} #else -#define vqdmulhs_lane_s32(__p0_159, __p1_159, __p2_159) __extension__ ({ \ - int32_t __s0_159 = __p0_159; \ - int32x2_t __s1_159 = __p1_159; \ - int32x2_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 1, 0); \ - int32_t __ret_159; \ - __ret_159 = __noswap_vqdmulhs_s32(__s0_159, __noswap_vget_lane_s32(__rev1_159, __p2_159)); \ - __ret_159; \ -}) +__ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_lane_s16(__p0_160, __p1_160, __p2_160) __extension__ ({ \ - int16_t __s0_160 = __p0_160; \ - int16x4_t __s1_160 = __p1_160; \ - int16_t __ret_160; \ - __ret_160 = vqdmulhh_s16(__s0_160, vget_lane_s16(__s1_160, __p2_160)); \ - __ret_160; \ -}) +__ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); + return __ret; +} #else -#define vqdmulhh_lane_s16(__p0_161, __p1_161, __p2_161) __extension__ ({ \ - int16_t __s0_161 = __p0_161; \ - int16x4_t __s1_161 = __p1_161; \ - int16x4_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 3, 2, 1, 0); \ - int16_t __ret_161; \ - __ret_161 = __noswap_vqdmulhh_s16(__s0_161, __noswap_vget_lane_s16(__rev1_161, __p2_161)); \ - __ret_161; \ -}) +__ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_laneq_s32(__p0_162, __p1_162, __p2_162) __extension__ ({ \ - int32_t __s0_162 = __p0_162; \ - int32x4_t __s1_162 = __p1_162; \ - int32_t __ret_162; \ - __ret_162 = vqdmulhs_s32(__s0_162, vgetq_lane_s32(__s1_162, __p2_162)); \ - __ret_162; \ -}) +__ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); + return __ret; +} #else -#define vqdmulhs_laneq_s32(__p0_163, __p1_163, __p2_163) __extension__ ({ \ - int32_t __s0_163 = __p0_163; \ - int32x4_t __s1_163 = __p1_163; \ - int32x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ - int32_t __ret_163; \ - __ret_163 = __noswap_vqdmulhs_s32(__s0_163, __noswap_vgetq_lane_s32(__rev1_163, __p2_163)); \ - __ret_163; \ -}) +__ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_laneq_s16(__p0_164, __p1_164, __p2_164) __extension__ ({ \ - int16_t __s0_164 = __p0_164; \ - int16x8_t __s1_164 = __p1_164; \ - int16_t __ret_164; \ - __ret_164 = vqdmulhh_s16(__s0_164, vgetq_lane_s16(__s1_164, __p2_164)); \ - __ret_164; \ -}) +__ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); + return __ret; +} #else -#define vqdmulhh_laneq_s16(__p0_165, __p1_165, __p2_165) __extension__ ({ \ - int16_t __s0_165 = __p0_165; \ - int16x8_t __s1_165 = __p1_165; \ - int16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_165; \ - __ret_165 = __noswap_vqdmulhh_s16(__s0_165, __noswap_vgetq_lane_s16(__rev1_165, __p2_165)); \ - __ret_165; \ -}) +__ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = vqdmulhq_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); + return __ret; +} #else -#define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmulhq_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __ret; \ - __ret = vqdmulhq_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); + return __ret; +} #else -#define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret; \ - __ret = __noswap_vqdmulhq_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); + return __ret; +} +__ai int32_t __noswap_vqadds_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __ret; \ - __ret = vqdmulh_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ - __ret; \ -}) +__ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); + return __ret; +} #else -#define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ - int32x4_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x2_t __ret; \ - __ret = __noswap_vqdmulh_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __ret; \ - __ret = vqdmulh_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) +__ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); + return __ret; +} #else -#define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ - int16x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret; \ - __ret = __noswap_vqdmulh_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) +__ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); + return __ret; +} +__ai int16_t __noswap_vqaddh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { +__ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); + __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); return __ret; } #else -__ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); - return __ret; -} -__ai int64_t __noswap_vqdmulls_s32(int32_t __p0, int32_t __p1) { +__ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); + __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { +__ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); return __ret; } #else -__ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqdmullh_s16(int16_t __p0, int16_t __p1) { +__ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - __ret = vqdmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); + __ret = vqdmlal_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else -__ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); int64x2_t __ret; - __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); + __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - __ret = vqdmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); + __ret = vqdmlal_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else -__ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); int32x4_t __ret; - __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); + __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int64x2_t __ret; \ - __ret = vqdmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ - __ret; \ -}) -#else -#define vqdmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ - int32x2_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - int64x2_t __ret; \ - __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int32x4_t __ret; \ - __ret = vqdmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ - __ret; \ -}) -#else -#define vqdmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ - int16x4_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - int32x4_t __ret; \ - __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vqdmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = vqdmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = vqdmlal_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vqdmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x4_t __s0 = __p0; \ +#define vqdmlal_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x2_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ int64x2_t __ret; \ - __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vqdmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = vqdmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = vqdmlal_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vqdmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x8_t __s0 = __p0; \ +#define vqdmlal_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { - int64x2_t __ret; - __ret = vqdmull_n_s32(vget_high_s32(__p0), __p1); - return __ret; -} -#else -__ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int64x2_t __ret; - __ret = __noswap_vqdmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { - int32x4_t __ret; - __ret = vqdmull_n_s16(vget_high_s16(__p0), __p1); - return __ret; -} -#else -__ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vqdmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmulls_lane_s32(__p0_166, __p1_166, __p2_166) __extension__ ({ \ - int32_t __s0_166 = __p0_166; \ - int32x2_t __s1_166 = __p1_166; \ - int64_t __ret_166; \ - __ret_166 = vqdmulls_s32(__s0_166, vget_lane_s32(__s1_166, __p2_166)); \ - __ret_166; \ -}) -#else -#define vqdmulls_lane_s32(__p0_167, __p1_167, __p2_167) __extension__ ({ \ - int32_t __s0_167 = __p0_167; \ - int32x2_t __s1_167 = __p1_167; \ - int32x2_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 1, 0); \ - int64_t __ret_167; \ - __ret_167 = __noswap_vqdmulls_s32(__s0_167, __noswap_vget_lane_s32(__rev1_167, __p2_167)); \ - __ret_167; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmullh_lane_s16(__p0_168, __p1_168, __p2_168) __extension__ ({ \ - int16_t __s0_168 = __p0_168; \ - int16x4_t __s1_168 = __p1_168; \ - int32_t __ret_168; \ - __ret_168 = vqdmullh_s16(__s0_168, vget_lane_s16(__s1_168, __p2_168)); \ - __ret_168; \ -}) -#else -#define vqdmullh_lane_s16(__p0_169, __p1_169, __p2_169) __extension__ ({ \ - int16_t __s0_169 = __p0_169; \ - int16x4_t __s1_169 = __p1_169; \ - int16x4_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 3, 2, 1, 0); \ - int32_t __ret_169; \ - __ret_169 = __noswap_vqdmullh_s16(__s0_169, __noswap_vget_lane_s16(__rev1_169, __p2_169)); \ - __ret_169; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmulls_laneq_s32(__p0_170, __p1_170, __p2_170) __extension__ ({ \ - int32_t __s0_170 = __p0_170; \ - int32x4_t __s1_170 = __p1_170; \ - int64_t __ret_170; \ - __ret_170 = vqdmulls_s32(__s0_170, vgetq_lane_s32(__s1_170, __p2_170)); \ - __ret_170; \ -}) -#else -#define vqdmulls_laneq_s32(__p0_171, __p1_171, __p2_171) __extension__ ({ \ - int32_t __s0_171 = __p0_171; \ - int32x4_t __s1_171 = __p1_171; \ - int32x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ - int64_t __ret_171; \ - __ret_171 = __noswap_vqdmulls_s32(__s0_171, __noswap_vgetq_lane_s32(__rev1_171, __p2_171)); \ - __ret_171; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmullh_laneq_s16(__p0_172, __p1_172, __p2_172) __extension__ ({ \ - int16_t __s0_172 = __p0_172; \ - int16x8_t __s1_172 = __p1_172; \ - int32_t __ret_172; \ - __ret_172 = vqdmullh_s16(__s0_172, vgetq_lane_s16(__s1_172, __p2_172)); \ - __ret_172; \ -}) -#else -#define vqdmullh_laneq_s16(__p0_173, __p1_173, __p2_173) __extension__ ({ \ - int16_t __s0_173 = __p0_173; \ - int16x8_t __s1_173 = __p1_173; \ - int16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32_t __ret_173; \ - __ret_173 = __noswap_vqdmullh_s16(__s0_173, __noswap_vgetq_lane_s16(__rev1_173, __p2_173)); \ - __ret_173; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ +#define vqdmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ int64x2_t __ret; \ - __ret = vqdmull_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = vqdmlal_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ __ret; \ }) #else -#define vqdmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ - int32x2_t __s0 = __p0; \ +#define vqdmlal_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int64x2_t __ret; \ - __ret = __noswap_vqdmull_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ +#define vqdmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ int32x4_t __ret; \ - __ret = vqdmull_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = vqdmlal_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ __ret; \ }) #else -#define vqdmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ - int16x4_t __s0 = __p0; \ +#define vqdmlal_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __noswap_vqdmull_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqmovns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); +__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { + int64x2_t __ret; + __ret = vqdmlal_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else -__ai int16_t vqmovns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); +__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vqdmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqmovnd_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); +__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { + int32x4_t __ret; + __ret = vqdmlal_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else -__ai int32_t vqmovnd_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); +__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqdmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqmovnh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); - return __ret; -} +#define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret; \ +}) #else -__ai int8_t vqmovnh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); - return __ret; -} +#define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqmovns_u32(uint32_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); - return __ret; -} +#define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret; \ +}) #else -__ai uint16_t vqmovns_u32(uint32_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); - return __ret; -} +#define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqmovnd_u64(uint64_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); - return __ret; -} +#define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret; \ +}) #else -__ai uint32_t vqmovnd_u64(uint64_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); - return __ret; -} +#define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqmovnh_u16(uint16_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); - return __ret; -} +#define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret; \ +}) #else -__ai uint8_t vqmovnh_u16(uint16_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); - return __ret; -} +#define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { - uint16x8_t __ret; - __ret = vcombine_u16(__p0, vqmovn_u32(__p1)); - return __ret; -} +#define vqdmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = vqdmlal_s32(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) #else -__ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16(__rev0, __noswap_vqmovn_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqdmlal_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmlal_s32(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { - uint32x4_t __ret; - __ret = vcombine_u32(__p0, vqmovn_u64(__p1)); - return __ret; -} +#define vqdmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqdmlal_s16(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) #else -__ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32(__rev0, __noswap_vqmovn_u64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vqdmlal_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmlal_s16(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { - uint8x16_t __ret; - __ret = vcombine_u8(__p0, vqmovn_u16(__p1)); +__ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); return __ret; } #else -__ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __noswap_vcombine_u8(__rev0, __noswap_vqmovn_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x8_t __ret; - __ret = vcombine_s16(__p0, vqmovn_s32(__p1)); +__ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); return __ret; } #else -__ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vcombine_s16(__rev0, __noswap_vqmovn_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x4_t __ret; - __ret = vcombine_s32(__p0, vqmovn_s64(__p1)); +__ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { + int64x2_t __ret; + __ret = vqdmlsl_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else -__ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x4_t __ret; - __ret = __noswap_vcombine_s32(__rev0, __noswap_vqmovn_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x16_t __ret; - __ret = vcombine_s8(__p0, vqmovn_s16(__p1)); +__ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { + int32x4_t __ret; + __ret = vqdmlsl_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else -__ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __noswap_vcombine_s8(__rev0, __noswap_vqmovn_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqmovuns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); - return __ret; -} +#define vqdmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = vqdmlsl_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) #else -__ai int16_t vqmovuns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); - return __ret; -} +#define vqdmlsl_high_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqmovund_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); - return __ret; -} +#define vqdmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqdmlsl_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) #else -__ai int32_t vqmovund_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); - return __ret; -} +#define vqdmlsl_high_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqmovunh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); - return __ret; -} +#define vqdmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = vqdmlsl_s32(__s0, vget_high_s32(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) #else -__ai int8_t vqmovunh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); - return __ret; -} +#define vqdmlsl_high_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { - uint16x8_t __ret; - __ret = vcombine_u16((uint16x4_t)(__p0), vqmovun_s32(__p1)); - return __ret; -} +#define vqdmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqdmlsl_s16(__s0, vget_high_s16(__s1), __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) #else -__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16((uint16x4_t)(__rev0), __noswap_vqmovun_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqdmlsl_high_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { - uint32x4_t __ret; - __ret = vcombine_u32((uint32x2_t)(__p0), vqmovun_s64(__p1)); +__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { + int64x2_t __ret; + __ret = vqdmlsl_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else -__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32((uint32x2_t)(__rev0), __noswap_vqmovun_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vqdmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { - uint8x16_t __ret; - __ret = vcombine_u8((uint8x8_t)(__p0), vqmovun_s16(__p1)); +__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { + int32x4_t __ret; + __ret = vqdmlsl_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else -__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __noswap_vcombine_u8((uint8x8_t)(__rev0), __noswap_vqmovun_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqdmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vqnegq_s64(int64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 35); - return __ret; -} +#define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret; \ +}) #else -__ai int64x2_t vqnegq_s64(int64x2_t __p0) { - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} +#define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x2_t __s2 = __p2; \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vqneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); - return __ret; -} +#define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret; \ +}) #else -__ai int64x1_t vqneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); - return __ret; -} +#define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x4_t __s2 = __p2; \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqnegb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); - return __ret; -} +#define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret; \ +}) #else -__ai int8_t vqnegb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); - return __ret; -} +#define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqnegs_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); - return __ret; -} +#define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret; \ +}) #else -__ai int32_t vqnegs_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); - return __ret; -} +#define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); - return __ret; -} +#define vqdmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __ret; \ + __ret = vqdmlsl_s32(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3)); \ + __ret; \ +}) #else -__ai int64_t vqnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); - return __ret; -} +#define vqdmlsl_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ + int64x2_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __s2 = __p2; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmlsl_s32(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqnegh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); - return __ret; -} +#define vqdmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __ret; \ + __ret = vqdmlsl_s16(__s0, __s1, __builtin_shufflevector(__s2, __s2, __p3, __p3, __p3, __p3)); \ + __ret; \ +}) #else -__ai int16_t vqnegh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); - return __ret; -} +#define vqdmlsl_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __s2 = __p2; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmlsl_s16(__rev0, __rev1, __builtin_shufflevector(__rev2, __rev2, __p3, __p3, __p3, __p3)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); return __ret; } #else -__ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); return __ret; } -__ai int32_t __noswap_vqrdmulhs_s32(int32_t __p0, int32_t __p1) { +__ai int32_t __noswap_vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); return __ret; } #else -__ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); return __ret; } -__ai int16_t __noswap_vqrdmulhh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t __noswap_vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_lane_s32(__p0_174, __p1_174, __p2_174) __extension__ ({ \ - int32_t __s0_174 = __p0_174; \ - int32x2_t __s1_174 = __p1_174; \ - int32_t __ret_174; \ - __ret_174 = vqrdmulhs_s32(__s0_174, vget_lane_s32(__s1_174, __p2_174)); \ - __ret_174; \ +#define vqdmulhs_lane_s32(__p0_158, __p1_158, __p2_158) __extension__ ({ \ + int32_t __s0_158 = __p0_158; \ + int32x2_t __s1_158 = __p1_158; \ + int32_t __ret_158; \ + __ret_158 = vqdmulhs_s32(__s0_158, vget_lane_s32(__s1_158, __p2_158)); \ + __ret_158; \ }) #else -#define vqrdmulhs_lane_s32(__p0_175, __p1_175, __p2_175) __extension__ ({ \ - int32_t __s0_175 = __p0_175; \ - int32x2_t __s1_175 = __p1_175; \ - int32x2_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 1, 0); \ - int32_t __ret_175; \ - __ret_175 = __noswap_vqrdmulhs_s32(__s0_175, __noswap_vget_lane_s32(__rev1_175, __p2_175)); \ - __ret_175; \ +#define vqdmulhs_lane_s32(__p0_159, __p1_159, __p2_159) __extension__ ({ \ + int32_t __s0_159 = __p0_159; \ + int32x2_t __s1_159 = __p1_159; \ + int32x2_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 1, 0); \ + int32_t __ret_159; \ + __ret_159 = __noswap_vqdmulhs_s32(__s0_159, __noswap_vget_lane_s32(__rev1_159, __p2_159)); \ + __ret_159; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_lane_s16(__p0_176, __p1_176, __p2_176) __extension__ ({ \ - int16_t __s0_176 = __p0_176; \ - int16x4_t __s1_176 = __p1_176; \ - int16_t __ret_176; \ - __ret_176 = vqrdmulhh_s16(__s0_176, vget_lane_s16(__s1_176, __p2_176)); \ - __ret_176; \ +#define vqdmulhh_lane_s16(__p0_160, __p1_160, __p2_160) __extension__ ({ \ + int16_t __s0_160 = __p0_160; \ + int16x4_t __s1_160 = __p1_160; \ + int16_t __ret_160; \ + __ret_160 = vqdmulhh_s16(__s0_160, vget_lane_s16(__s1_160, __p2_160)); \ + __ret_160; \ }) #else -#define vqrdmulhh_lane_s16(__p0_177, __p1_177, __p2_177) __extension__ ({ \ - int16_t __s0_177 = __p0_177; \ - int16x4_t __s1_177 = __p1_177; \ - int16x4_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 3, 2, 1, 0); \ - int16_t __ret_177; \ - __ret_177 = __noswap_vqrdmulhh_s16(__s0_177, __noswap_vget_lane_s16(__rev1_177, __p2_177)); \ - __ret_177; \ +#define vqdmulhh_lane_s16(__p0_161, __p1_161, __p2_161) __extension__ ({ \ + int16_t __s0_161 = __p0_161; \ + int16x4_t __s1_161 = __p1_161; \ + int16x4_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 3, 2, 1, 0); \ + int16_t __ret_161; \ + __ret_161 = __noswap_vqdmulhh_s16(__s0_161, __noswap_vget_lane_s16(__rev1_161, __p2_161)); \ + __ret_161; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_laneq_s32(__p0_178, __p1_178, __p2_178) __extension__ ({ \ - int32_t __s0_178 = __p0_178; \ - int32x4_t __s1_178 = __p1_178; \ - int32_t __ret_178; \ - __ret_178 = vqrdmulhs_s32(__s0_178, vgetq_lane_s32(__s1_178, __p2_178)); \ - __ret_178; \ +#define vqdmulhs_laneq_s32(__p0_162, __p1_162, __p2_162) __extension__ ({ \ + int32_t __s0_162 = __p0_162; \ + int32x4_t __s1_162 = __p1_162; \ + int32_t __ret_162; \ + __ret_162 = vqdmulhs_s32(__s0_162, vgetq_lane_s32(__s1_162, __p2_162)); \ + __ret_162; \ }) #else -#define vqrdmulhs_laneq_s32(__p0_179, __p1_179, __p2_179) __extension__ ({ \ - int32_t __s0_179 = __p0_179; \ - int32x4_t __s1_179 = __p1_179; \ - int32x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 3, 2, 1, 0); \ - int32_t __ret_179; \ - __ret_179 = __noswap_vqrdmulhs_s32(__s0_179, __noswap_vgetq_lane_s32(__rev1_179, __p2_179)); \ - __ret_179; \ +#define vqdmulhs_laneq_s32(__p0_163, __p1_163, __p2_163) __extension__ ({ \ + int32_t __s0_163 = __p0_163; \ + int32x4_t __s1_163 = __p1_163; \ + int32x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ + int32_t __ret_163; \ + __ret_163 = __noswap_vqdmulhs_s32(__s0_163, __noswap_vgetq_lane_s32(__rev1_163, __p2_163)); \ + __ret_163; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_laneq_s16(__p0_180, __p1_180, __p2_180) __extension__ ({ \ - int16_t __s0_180 = __p0_180; \ - int16x8_t __s1_180 = __p1_180; \ - int16_t __ret_180; \ - __ret_180 = vqrdmulhh_s16(__s0_180, vgetq_lane_s16(__s1_180, __p2_180)); \ - __ret_180; \ +#define vqdmulhh_laneq_s16(__p0_164, __p1_164, __p2_164) __extension__ ({ \ + int16_t __s0_164 = __p0_164; \ + int16x8_t __s1_164 = __p1_164; \ + int16_t __ret_164; \ + __ret_164 = vqdmulhh_s16(__s0_164, vgetq_lane_s16(__s1_164, __p2_164)); \ + __ret_164; \ }) #else -#define vqrdmulhh_laneq_s16(__p0_181, __p1_181, __p2_181) __extension__ ({ \ - int16_t __s0_181 = __p0_181; \ - int16x8_t __s1_181 = __p1_181; \ - int16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_181; \ - __ret_181 = __noswap_vqrdmulhh_s16(__s0_181, __noswap_vgetq_lane_s16(__rev1_181, __p2_181)); \ - __ret_181; \ +#define vqdmulhh_laneq_s16(__p0_165, __p1_165, __p2_165) __extension__ ({ \ + int16_t __s0_165 = __p0_165; \ + int16x8_t __s1_165 = __p1_165; \ + int16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_165; \ + __ret_165 = __noswap_vqdmulhh_s16(__s0_165, __noswap_vgetq_lane_s16(__rev1_165, __p2_165)); \ + __ret_165; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = vqrdmulhq_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = vqdmulhq_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = __noswap_vqrdmulhq_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __noswap_vqdmulhq_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = vqrdmulhq_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret = vqdmulhq_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = __noswap_vqrdmulhq_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret = __noswap_vqdmulhq_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = vqrdmulh_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ + __ret = vqdmulh_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ __ret; \ }) #else -#define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x2_t __ret; \ - __ret = __noswap_vqrdmulh_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __noswap_vqdmulh_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = vqrdmulh_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret = vqdmulh_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ +#define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = __noswap_vqrdmulh_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __noswap_vqdmulh_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); - return __ret; -} -#else -__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); - return __ret; -} -#else -__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); +__ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); return __ret; } #else -__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); +__ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); return __ret; } -#else -__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); +__ai int64_t __noswap_vqdmulls_s32(int32_t __p0, int32_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); +__ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); return __ret; } #else -__ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); return __ret; } -#else -__ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { +__ai int32_t __noswap_vqdmullh_s16(int16_t __p0, int16_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); +__ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { + int64x2_t __ret; + __ret = vqdmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); return __ret; } #else -__ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); +__ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); +__ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { + int32x4_t __ret; + __ret = vqdmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); return __ret; } #else -__ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); +__ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u32(__p0_182, __p1_182, __p2_182) __extension__ ({ \ - uint16x4_t __s0_182 = __p0_182; \ - uint32x4_t __s1_182 = __p1_182; \ - uint16x8_t __ret_182; \ - __ret_182 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_182), (uint16x4_t)(vqrshrn_n_u32(__s1_182, __p2_182)))); \ - __ret_182; \ -}) -#else -#define vqrshrn_high_n_u32(__p0_183, __p1_183, __p2_183) __extension__ ({ \ - uint16x4_t __s0_183 = __p0_183; \ - uint32x4_t __s1_183 = __p1_183; \ - uint16x4_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 3, 2, 1, 0); \ - uint32x4_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 3, 2, 1, 0); \ - uint16x8_t __ret_183; \ - __ret_183 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_183), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_183, __p2_183)))); \ - __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_183; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u64(__p0_184, __p1_184, __p2_184) __extension__ ({ \ - uint32x2_t __s0_184 = __p0_184; \ - uint64x2_t __s1_184 = __p1_184; \ - uint32x4_t __ret_184; \ - __ret_184 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_184), (uint32x2_t)(vqrshrn_n_u64(__s1_184, __p2_184)))); \ - __ret_184; \ -}) -#else -#define vqrshrn_high_n_u64(__p0_185, __p1_185, __p2_185) __extension__ ({ \ - uint32x2_t __s0_185 = __p0_185; \ - uint64x2_t __s1_185 = __p1_185; \ - uint32x2_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 1, 0); \ - uint64x2_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 1, 0); \ - uint32x4_t __ret_185; \ - __ret_185 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_185), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_185, __p2_185)))); \ - __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, 3, 2, 1, 0); \ - __ret_185; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u16(__p0_186, __p1_186, __p2_186) __extension__ ({ \ - uint8x8_t __s0_186 = __p0_186; \ - uint16x8_t __s1_186 = __p1_186; \ - uint8x16_t __ret_186; \ - __ret_186 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_186), (uint8x8_t)(vqrshrn_n_u16(__s1_186, __p2_186)))); \ - __ret_186; \ -}) -#else -#define vqrshrn_high_n_u16(__p0_187, __p1_187, __p2_187) __extension__ ({ \ - uint8x8_t __s0_187 = __p0_187; \ - uint16x8_t __s1_187 = __p1_187; \ - uint8x8_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_187; \ - __ret_187 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_187), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_187, __p2_187)))); \ - __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_187; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s32(__p0_188, __p1_188, __p2_188) __extension__ ({ \ - int16x4_t __s0_188 = __p0_188; \ - int32x4_t __s1_188 = __p1_188; \ - int16x8_t __ret_188; \ - __ret_188 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_188), (int16x4_t)(vqrshrn_n_s32(__s1_188, __p2_188)))); \ - __ret_188; \ -}) -#else -#define vqrshrn_high_n_s32(__p0_189, __p1_189, __p2_189) __extension__ ({ \ - int16x4_t __s0_189 = __p0_189; \ - int32x4_t __s1_189 = __p1_189; \ - int16x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, 3, 2, 1, 0); \ - int32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, 3, 2, 1, 0); \ - int16x8_t __ret_189; \ - __ret_189 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_189), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_189, __p2_189)))); \ - __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_189; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s64(__p0_190, __p1_190, __p2_190) __extension__ ({ \ - int32x2_t __s0_190 = __p0_190; \ - int64x2_t __s1_190 = __p1_190; \ - int32x4_t __ret_190; \ - __ret_190 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_190), (int32x2_t)(vqrshrn_n_s64(__s1_190, __p2_190)))); \ - __ret_190; \ -}) -#else -#define vqrshrn_high_n_s64(__p0_191, __p1_191, __p2_191) __extension__ ({ \ - int32x2_t __s0_191 = __p0_191; \ - int64x2_t __s1_191 = __p1_191; \ - int32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, 1, 0); \ - int64x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, 1, 0); \ - int32x4_t __ret_191; \ - __ret_191 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_191), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_191, __p2_191)))); \ - __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, 3, 2, 1, 0); \ - __ret_191; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s16(__p0_192, __p1_192, __p2_192) __extension__ ({ \ - int8x8_t __s0_192 = __p0_192; \ - int16x8_t __s1_192 = __p1_192; \ - int8x16_t __ret_192; \ - __ret_192 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_192), (int8x8_t)(vqrshrn_n_s16(__s1_192, __p2_192)))); \ - __ret_192; \ -}) -#else -#define vqrshrn_high_n_s16(__p0_193, __p1_193, __p2_193) __extension__ ({ \ - int8x8_t __s0_193 = __p0_193; \ - int16x8_t __s1_193 = __p1_193; \ - int8x8_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_193; \ - __ret_193 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_193), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_193, __p2_193)))); \ - __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_193; \ -}) +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ +#define vqdmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vqdmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ +#define vqdmull_high_lane_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x2_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ +#define vqdmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vqdmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ +#define vqdmull_high_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x4_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ +#define vqdmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vqdmull_s32(vget_high_s32(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ +#define vqdmull_high_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ +#define vqdmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vqdmull_s16(vget_high_s16(__s0), __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ +#define vqdmull_high_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { + int64x2_t __ret; + __ret = vqdmull_n_s32(vget_high_s32(__p0), __p1); + return __ret; +} #else -#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int64x2_t __ret; + __ret = __noswap_vqdmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { + int32x4_t __ret; + __ret = vqdmull_n_s16(vget_high_s16(__p0), __p1); + return __ret; +} #else -#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vqdmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s32(__p0_194, __p1_194, __p2_194) __extension__ ({ \ - int16x4_t __s0_194 = __p0_194; \ - int32x4_t __s1_194 = __p1_194; \ - int16x8_t __ret_194; \ - __ret_194 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_194), (int16x4_t)(vqrshrun_n_s32(__s1_194, __p2_194)))); \ - __ret_194; \ +#define vqdmulls_lane_s32(__p0_166, __p1_166, __p2_166) __extension__ ({ \ + int32_t __s0_166 = __p0_166; \ + int32x2_t __s1_166 = __p1_166; \ + int64_t __ret_166; \ + __ret_166 = vqdmulls_s32(__s0_166, vget_lane_s32(__s1_166, __p2_166)); \ + __ret_166; \ }) #else -#define vqrshrun_high_n_s32(__p0_195, __p1_195, __p2_195) __extension__ ({ \ - int16x4_t __s0_195 = __p0_195; \ - int32x4_t __s1_195 = __p1_195; \ - int16x4_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 3, 2, 1, 0); \ - int32x4_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 3, 2, 1, 0); \ - int16x8_t __ret_195; \ - __ret_195 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_195), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_195, __p2_195)))); \ - __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_195; \ +#define vqdmulls_lane_s32(__p0_167, __p1_167, __p2_167) __extension__ ({ \ + int32_t __s0_167 = __p0_167; \ + int32x2_t __s1_167 = __p1_167; \ + int32x2_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 1, 0); \ + int64_t __ret_167; \ + __ret_167 = __noswap_vqdmulls_s32(__s0_167, __noswap_vget_lane_s32(__rev1_167, __p2_167)); \ + __ret_167; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s64(__p0_196, __p1_196, __p2_196) __extension__ ({ \ - int32x2_t __s0_196 = __p0_196; \ - int64x2_t __s1_196 = __p1_196; \ - int32x4_t __ret_196; \ - __ret_196 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_196), (int32x2_t)(vqrshrun_n_s64(__s1_196, __p2_196)))); \ - __ret_196; \ +#define vqdmullh_lane_s16(__p0_168, __p1_168, __p2_168) __extension__ ({ \ + int16_t __s0_168 = __p0_168; \ + int16x4_t __s1_168 = __p1_168; \ + int32_t __ret_168; \ + __ret_168 = vqdmullh_s16(__s0_168, vget_lane_s16(__s1_168, __p2_168)); \ + __ret_168; \ }) #else -#define vqrshrun_high_n_s64(__p0_197, __p1_197, __p2_197) __extension__ ({ \ - int32x2_t __s0_197 = __p0_197; \ - int64x2_t __s1_197 = __p1_197; \ - int32x2_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 1, 0); \ - int64x2_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 1, 0); \ - int32x4_t __ret_197; \ - __ret_197 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_197), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_197, __p2_197)))); \ - __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, 3, 2, 1, 0); \ - __ret_197; \ +#define vqdmullh_lane_s16(__p0_169, __p1_169, __p2_169) __extension__ ({ \ + int16_t __s0_169 = __p0_169; \ + int16x4_t __s1_169 = __p1_169; \ + int16x4_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 3, 2, 1, 0); \ + int32_t __ret_169; \ + __ret_169 = __noswap_vqdmullh_s16(__s0_169, __noswap_vget_lane_s16(__rev1_169, __p2_169)); \ + __ret_169; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s16(__p0_198, __p1_198, __p2_198) __extension__ ({ \ - int8x8_t __s0_198 = __p0_198; \ - int16x8_t __s1_198 = __p1_198; \ - int8x16_t __ret_198; \ - __ret_198 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_198), (int8x8_t)(vqrshrun_n_s16(__s1_198, __p2_198)))); \ - __ret_198; \ +#define vqdmulls_laneq_s32(__p0_170, __p1_170, __p2_170) __extension__ ({ \ + int32_t __s0_170 = __p0_170; \ + int32x4_t __s1_170 = __p1_170; \ + int64_t __ret_170; \ + __ret_170 = vqdmulls_s32(__s0_170, vgetq_lane_s32(__s1_170, __p2_170)); \ + __ret_170; \ }) #else -#define vqrshrun_high_n_s16(__p0_199, __p1_199, __p2_199) __extension__ ({ \ - int8x8_t __s0_199 = __p0_199; \ - int16x8_t __s1_199 = __p1_199; \ - int8x8_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_199; \ - __ret_199 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_199), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_199, __p2_199)))); \ - __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_199; \ +#define vqdmulls_laneq_s32(__p0_171, __p1_171, __p2_171) __extension__ ({ \ + int32_t __s0_171 = __p0_171; \ + int32x4_t __s1_171 = __p1_171; \ + int32x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ + int64_t __ret_171; \ + __ret_171 = __noswap_vqdmulls_s32(__s0_171, __noswap_vgetq_lane_s32(__rev1_171, __p2_171)); \ + __ret_171; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ - __ret; \ +#define vqdmullh_laneq_s16(__p0_172, __p1_172, __p2_172) __extension__ ({ \ + int16_t __s0_172 = __p0_172; \ + int16x8_t __s1_172 = __p1_172; \ + int32_t __ret_172; \ + __ret_172 = vqdmullh_s16(__s0_172, vgetq_lane_s16(__s1_172, __p2_172)); \ + __ret_172; \ }) #else -#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ - __ret; \ +#define vqdmullh_laneq_s16(__p0_173, __p1_173, __p2_173) __extension__ ({ \ + int16_t __s0_173 = __p0_173; \ + int16x8_t __s1_173 = __p1_173; \ + int16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32_t __ret_173; \ + __ret_173 = __noswap_vqdmullh_s16(__s0_173, __noswap_vgetq_lane_s16(__rev1_173, __p2_173)); \ + __ret_173; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ +#define vqdmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int64x2_t __ret; \ + __ret = vqdmull_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ +#define vqdmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int64x2_t __ret; \ + __ret = __noswap_vqdmull_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ +#define vqdmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vqdmull_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ +#define vqdmull_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqdmull_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); +__ai int16_t vqmovns_s32(int32_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); return __ret; } #else -__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); +__ai int16_t vqmovns_s32(int32_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); +__ai int32_t vqmovnd_s64(int64_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); return __ret; } #else -__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); +__ai int32_t vqmovnd_s64(int64_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); +__ai int8_t vqmovnh_s16(int16_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); return __ret; } #else -__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); +__ai int8_t vqmovnh_s16(int16_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqmovns_u32(uint32_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); + __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); return __ret; } #else -__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqmovns_u32(uint32_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); + __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); +__ai uint32_t vqmovnd_u64(uint64_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); return __ret; } #else -__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); +__ai uint32_t vqmovnd_u64(uint64_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); +__ai uint8_t vqmovnh_u16(uint16_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); return __ret; } #else -__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); +__ai uint8_t vqmovnh_u16(uint16_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); +__ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { + uint16x8_t __ret; + __ret = vcombine_u16(__p0, vqmovn_u32(__p1)); return __ret; } #else -__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); +__ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16(__rev0, __noswap_vqmovn_u32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); +__ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { + uint32x4_t __ret; + __ret = vcombine_u32(__p0, vqmovn_u64(__p1)); return __ret; } #else -__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); +__ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vcombine_u32(__rev0, __noswap_vqmovn_u64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ - __ret; \ -}) +__ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { + uint8x16_t __ret; + __ret = vcombine_u8(__p0, vqmovn_u16(__p1)); + return __ret; +} #else -#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ - __ret; \ -}) +__ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8(__rev0, __noswap_vqmovn_u16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { + int16x8_t __ret; + __ret = vcombine_s16(__p0, vqmovn_s32(__p1)); + return __ret; +} #else -#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vcombine_s16(__rev0, __noswap_vqmovn_s32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { + int32x4_t __ret; + __ret = vcombine_s32(__p0, vqmovn_s64(__p1)); + return __ret; +} #else -#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x4_t __ret; + __ret = __noswap_vcombine_s32(__rev0, __noswap_vqmovn_s64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { + int8x16_t __ret; + __ret = vcombine_s8(__p0, vqmovn_s16(__p1)); + return __ret; +} #else -#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __noswap_vcombine_s8(__rev0, __noswap_vqmovn_s16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqmovuns_s32(int32_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); + return __ret; +} #else -#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqmovuns_s32(int32_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int32_t vqmovund_s64(int64_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); + return __ret; +} #else -#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int32_t vqmovund_s64(int64_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int8_t vqmovunh_s16(int16_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); + return __ret; +} #else -#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai int8_t vqmovunh_s16(int16_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { + uint16x8_t __ret; + __ret = vcombine_u16((uint16x4_t)(__p0), vqmovun_s32(__p1)); + return __ret; +} #else -#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ - __ret; \ -}) +__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16((uint16x4_t)(__rev0), __noswap_vqmovun_s32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u32(__p0_200, __p1_200, __p2_200) __extension__ ({ \ - uint16x4_t __s0_200 = __p0_200; \ - uint32x4_t __s1_200 = __p1_200; \ - uint16x8_t __ret_200; \ - __ret_200 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_200), (uint16x4_t)(vqshrn_n_u32(__s1_200, __p2_200)))); \ - __ret_200; \ -}) +__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { + uint32x4_t __ret; + __ret = vcombine_u32((uint32x2_t)(__p0), vqmovun_s64(__p1)); + return __ret; +} #else -#define vqshrn_high_n_u32(__p0_201, __p1_201, __p2_201) __extension__ ({ \ - uint16x4_t __s0_201 = __p0_201; \ - uint32x4_t __s1_201 = __p1_201; \ - uint16x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ - uint32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ - uint16x8_t __ret_201; \ - __ret_201 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_201), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_201, __p2_201)))); \ - __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_201; \ -}) +__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vcombine_u32((uint32x2_t)(__rev0), __noswap_vqmovun_s64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u64(__p0_202, __p1_202, __p2_202) __extension__ ({ \ - uint32x2_t __s0_202 = __p0_202; \ - uint64x2_t __s1_202 = __p1_202; \ - uint32x4_t __ret_202; \ - __ret_202 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_202), (uint32x2_t)(vqshrn_n_u64(__s1_202, __p2_202)))); \ - __ret_202; \ -}) +__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { + uint8x16_t __ret; + __ret = vcombine_u8((uint8x8_t)(__p0), vqmovun_s16(__p1)); + return __ret; +} #else -#define vqshrn_high_n_u64(__p0_203, __p1_203, __p2_203) __extension__ ({ \ - uint32x2_t __s0_203 = __p0_203; \ - uint64x2_t __s1_203 = __p1_203; \ - uint32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ - uint64x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ - uint32x4_t __ret_203; \ - __ret_203 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_203), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_203, __p2_203)))); \ - __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, 3, 2, 1, 0); \ - __ret_203; \ -}) +__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8((uint8x8_t)(__rev0), __noswap_vqmovun_s16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u16(__p0_204, __p1_204, __p2_204) __extension__ ({ \ - uint8x8_t __s0_204 = __p0_204; \ - uint16x8_t __s1_204 = __p1_204; \ - uint8x16_t __ret_204; \ - __ret_204 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_204), (uint8x8_t)(vqshrn_n_u16(__s1_204, __p2_204)))); \ - __ret_204; \ -}) +__ai int64x2_t vqnegq_s64(int64x2_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 35); + return __ret; +} #else -#define vqshrn_high_n_u16(__p0_205, __p1_205, __p2_205) __extension__ ({ \ - uint8x8_t __s0_205 = __p0_205; \ - uint16x8_t __s1_205 = __p1_205; \ - uint8x8_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_205; \ - __ret_205 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_205), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_205, __p2_205)))); \ - __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_205; \ -}) +__ai int64x2_t vqnegq_s64(int64x2_t __p0) { + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __ret; + __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 35); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s32(__p0_206, __p1_206, __p2_206) __extension__ ({ \ - int16x4_t __s0_206 = __p0_206; \ - int32x4_t __s1_206 = __p1_206; \ - int16x8_t __ret_206; \ - __ret_206 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_206), (int16x4_t)(vqshrn_n_s32(__s1_206, __p2_206)))); \ - __ret_206; \ -}) +__ai int64x1_t vqneg_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); + return __ret; +} #else -#define vqshrn_high_n_s32(__p0_207, __p1_207, __p2_207) __extension__ ({ \ - int16x4_t __s0_207 = __p0_207; \ - int32x4_t __s1_207 = __p1_207; \ - int16x4_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, 3, 2, 1, 0); \ - int32x4_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, 3, 2, 1, 0); \ - int16x8_t __ret_207; \ - __ret_207 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_207), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_207, __p2_207)))); \ - __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_207; \ -}) +__ai int64x1_t vqneg_s64(int64x1_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s64(__p0_208, __p1_208, __p2_208) __extension__ ({ \ - int32x2_t __s0_208 = __p0_208; \ - int64x2_t __s1_208 = __p1_208; \ - int32x4_t __ret_208; \ - __ret_208 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_208), (int32x2_t)(vqshrn_n_s64(__s1_208, __p2_208)))); \ - __ret_208; \ -}) +__ai int8_t vqnegb_s8(int8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); + return __ret; +} #else -#define vqshrn_high_n_s64(__p0_209, __p1_209, __p2_209) __extension__ ({ \ - int32x2_t __s0_209 = __p0_209; \ - int64x2_t __s1_209 = __p1_209; \ - int32x2_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, 1, 0); \ - int64x2_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, 1, 0); \ - int32x4_t __ret_209; \ - __ret_209 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_209), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_209, __p2_209)))); \ - __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, 3, 2, 1, 0); \ - __ret_209; \ -}) +__ai int8_t vqnegb_s8(int8_t __p0) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s16(__p0_210, __p1_210, __p2_210) __extension__ ({ \ - int8x8_t __s0_210 = __p0_210; \ - int16x8_t __s1_210 = __p1_210; \ - int8x16_t __ret_210; \ - __ret_210 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_210), (int8x8_t)(vqshrn_n_s16(__s1_210, __p2_210)))); \ - __ret_210; \ -}) +__ai int32_t vqnegs_s32(int32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); + return __ret; +} #else -#define vqshrn_high_n_s16(__p0_211, __p1_211, __p2_211) __extension__ ({ \ - int8x8_t __s0_211 = __p0_211; \ - int16x8_t __s1_211 = __p1_211; \ - int8x8_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_211; \ - __ret_211 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_211), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_211, __p2_211)))); \ - __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_211; \ -}) +__ai int32_t vqnegs_s32(int32_t __p0) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ - __ret; \ -}) +__ai int64_t vqnegd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); + return __ret; +} #else -#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ - __ret; \ -}) +__ai int64_t vqnegd_s64(int64_t __p0) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqnegh_s16(int16_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); + return __ret; +} #else -#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqnegh_s16(int16_t __p0) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) +__ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + return __ret; +} #else -#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) +__ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + return __ret; +} +__ai int32_t __noswap_vqrdmulhs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + return __ret; +} #else -#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ - __ret; \ -}) +__ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + return __ret; +} +__ai int16_t __noswap_vqrdmulhh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ - __ret; \ +#define vqrdmulhs_lane_s32(__p0_174, __p1_174, __p2_174) __extension__ ({ \ + int32_t __s0_174 = __p0_174; \ + int32x2_t __s1_174 = __p1_174; \ + int32_t __ret_174; \ + __ret_174 = vqrdmulhs_s32(__s0_174, vget_lane_s32(__s1_174, __p2_174)); \ + __ret_174; \ }) #else -#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ - __ret; \ +#define vqrdmulhs_lane_s32(__p0_175, __p1_175, __p2_175) __extension__ ({ \ + int32_t __s0_175 = __p0_175; \ + int32x2_t __s1_175 = __p1_175; \ + int32x2_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 1, 0); \ + int32_t __ret_175; \ + __ret_175 = __noswap_vqrdmulhs_s32(__s0_175, __noswap_vget_lane_s32(__rev1_175, __p2_175)); \ + __ret_175; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ - __ret; \ +#define vqrdmulhh_lane_s16(__p0_176, __p1_176, __p2_176) __extension__ ({ \ + int16_t __s0_176 = __p0_176; \ + int16x4_t __s1_176 = __p1_176; \ + int16_t __ret_176; \ + __ret_176 = vqrdmulhh_s16(__s0_176, vget_lane_s16(__s1_176, __p2_176)); \ + __ret_176; \ }) #else -#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ - __ret; \ +#define vqrdmulhh_lane_s16(__p0_177, __p1_177, __p2_177) __extension__ ({ \ + int16_t __s0_177 = __p0_177; \ + int16x4_t __s1_177 = __p1_177; \ + int16x4_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 3, 2, 1, 0); \ + int16_t __ret_177; \ + __ret_177 = __noswap_vqrdmulhh_s16(__s0_177, __noswap_vget_lane_s16(__rev1_177, __p2_177)); \ + __ret_177; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s32(__p0_212, __p1_212, __p2_212) __extension__ ({ \ - int16x4_t __s0_212 = __p0_212; \ - int32x4_t __s1_212 = __p1_212; \ - int16x8_t __ret_212; \ - __ret_212 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_212), (int16x4_t)(vqshrun_n_s32(__s1_212, __p2_212)))); \ - __ret_212; \ +#define vqrdmulhs_laneq_s32(__p0_178, __p1_178, __p2_178) __extension__ ({ \ + int32_t __s0_178 = __p0_178; \ + int32x4_t __s1_178 = __p1_178; \ + int32_t __ret_178; \ + __ret_178 = vqrdmulhs_s32(__s0_178, vgetq_lane_s32(__s1_178, __p2_178)); \ + __ret_178; \ }) #else -#define vqshrun_high_n_s32(__p0_213, __p1_213, __p2_213) __extension__ ({ \ - int16x4_t __s0_213 = __p0_213; \ - int32x4_t __s1_213 = __p1_213; \ - int16x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ - int32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ - int16x8_t __ret_213; \ - __ret_213 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_213), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_213, __p2_213)))); \ - __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_213; \ +#define vqrdmulhs_laneq_s32(__p0_179, __p1_179, __p2_179) __extension__ ({ \ + int32_t __s0_179 = __p0_179; \ + int32x4_t __s1_179 = __p1_179; \ + int32x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 3, 2, 1, 0); \ + int32_t __ret_179; \ + __ret_179 = __noswap_vqrdmulhs_s32(__s0_179, __noswap_vgetq_lane_s32(__rev1_179, __p2_179)); \ + __ret_179; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s64(__p0_214, __p1_214, __p2_214) __extension__ ({ \ - int32x2_t __s0_214 = __p0_214; \ - int64x2_t __s1_214 = __p1_214; \ - int32x4_t __ret_214; \ - __ret_214 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_214), (int32x2_t)(vqshrun_n_s64(__s1_214, __p2_214)))); \ - __ret_214; \ +#define vqrdmulhh_laneq_s16(__p0_180, __p1_180, __p2_180) __extension__ ({ \ + int16_t __s0_180 = __p0_180; \ + int16x8_t __s1_180 = __p1_180; \ + int16_t __ret_180; \ + __ret_180 = vqrdmulhh_s16(__s0_180, vgetq_lane_s16(__s1_180, __p2_180)); \ + __ret_180; \ }) #else -#define vqshrun_high_n_s64(__p0_215, __p1_215, __p2_215) __extension__ ({ \ - int32x2_t __s0_215 = __p0_215; \ - int64x2_t __s1_215 = __p1_215; \ - int32x2_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, 1, 0); \ - int64x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, 1, 0); \ - int32x4_t __ret_215; \ - __ret_215 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_215), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_215, __p2_215)))); \ - __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, 3, 2, 1, 0); \ - __ret_215; \ +#define vqrdmulhh_laneq_s16(__p0_181, __p1_181, __p2_181) __extension__ ({ \ + int16_t __s0_181 = __p0_181; \ + int16x8_t __s1_181 = __p1_181; \ + int16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_181; \ + __ret_181 = __noswap_vqrdmulhh_s16(__s0_181, __noswap_vgetq_lane_s16(__rev1_181, __p2_181)); \ + __ret_181; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s16(__p0_216, __p1_216, __p2_216) __extension__ ({ \ - int8x8_t __s0_216 = __p0_216; \ - int16x8_t __s1_216 = __p1_216; \ - int8x16_t __ret_216; \ - __ret_216 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_216), (int8x8_t)(vqshrun_n_s16(__s1_216, __p2_216)))); \ - __ret_216; \ -}) -#else -#define vqshrun_high_n_s16(__p0_217, __p1_217, __p2_217) __extension__ ({ \ - int8x8_t __s0_217 = __p0_217; \ - int16x8_t __s1_217 = __p1_217; \ - int8x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_217; \ - __ret_217 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_217), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_217, __p2_217)))); \ - __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_217; \ +#define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __ret; \ + __ret = vqrdmulhq_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ + __ret; \ +}) +#else +#define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x4_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x4_t __ret; \ + __ret = __noswap_vqrdmulhq_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ +#define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __ret; \ + __ret = vqrdmulhq_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ +#define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret; \ + __ret = __noswap_vqrdmulhq_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ +#define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __ret; \ + __ret = vqrdmulh_s32(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2)); \ __ret; \ }) #else -#define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ +#define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ + int32x2_t __s0 = __p0; \ + int32x4_t __s1 = __p1; \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + int32x2_t __ret; \ + __ret = __noswap_vqrdmulh_s32(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ +#define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __ret; \ + __ret = vqrdmulh_s16(__s0, __builtin_shufflevector(__s1, __s1, __p2, __p2, __p2, __p2)); \ __ret; \ }) #else -#define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ +#define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x4_t __s0 = __p0; \ + int16x8_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret; \ + __ret = __noswap_vqrdmulh_s16(__rev0, __builtin_shufflevector(__rev1, __rev1, __p2, __p2, __p2, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); + __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); return __ret; } #else -__ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); + __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); + __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); return __ret; } #else -__ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); + __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); return __ret; } #else -__ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); + __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); + __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); return __ret; } #else -__ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); + __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { +__ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); + __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); return __ret; } #else -__ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { +__ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); + __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); return __ret; } #else -__ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqsubs_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); + __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { +__ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); + __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); return __ret; } #else -__ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { +__ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); + __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); + __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); return __ret; } #else -__ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqsubh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 4); - return __ret; -} -#else -__ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 36); - return __ret; -} -#else -__ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 48); - return __ret; -} -#else -__ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 32); - return __ret; -} -#else -__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 16); - return __ret; -} +#define vqrshrn_high_n_u32(__p0_182, __p1_182, __p2_182) __extension__ ({ \ + uint16x4_t __s0_182 = __p0_182; \ + uint32x4_t __s1_182 = __p1_182; \ + uint16x8_t __ret_182; \ + __ret_182 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_182), (uint16x4_t)(vqrshrn_n_u32(__s1_182, __p2_182)))); \ + __ret_182; \ +}) #else -__ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_u32(__p0_183, __p1_183, __p2_183) __extension__ ({ \ + uint16x4_t __s0_183 = __p0_183; \ + uint32x4_t __s1_183 = __p1_183; \ + uint16x4_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 3, 2, 1, 0); \ + uint32x4_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 3, 2, 1, 0); \ + uint16x8_t __ret_183; \ + __ret_183 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_183), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_183, __p2_183)))); \ + __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_183; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 0); - return __ret; -} +#define vqrshrn_high_n_u64(__p0_184, __p1_184, __p2_184) __extension__ ({ \ + uint32x2_t __s0_184 = __p0_184; \ + uint64x2_t __s1_184 = __p1_184; \ + uint32x4_t __ret_184; \ + __ret_184 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_184), (uint32x2_t)(vqrshrn_n_u64(__s1_184, __p2_184)))); \ + __ret_184; \ +}) #else -__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_u64(__p0_185, __p1_185, __p2_185) __extension__ ({ \ + uint32x2_t __s0_185 = __p0_185; \ + uint64x2_t __s1_185 = __p1_185; \ + uint32x2_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 1, 0); \ + uint64x2_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 1, 0); \ + uint32x4_t __ret_185; \ + __ret_185 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_185), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_185, __p2_185)))); \ + __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, 3, 2, 1, 0); \ + __ret_185; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 4); - return __ret; -} +#define vqrshrn_high_n_u16(__p0_186, __p1_186, __p2_186) __extension__ ({ \ + uint8x8_t __s0_186 = __p0_186; \ + uint16x8_t __s1_186 = __p1_186; \ + uint8x16_t __ret_186; \ + __ret_186 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_186), (uint8x8_t)(vqrshrn_n_u16(__s1_186, __p2_186)))); \ + __ret_186; \ +}) #else -__ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { - poly8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_u16(__p0_187, __p1_187, __p2_187) __extension__ ({ \ + uint8x8_t __s0_187 = __p0_187; \ + uint16x8_t __s1_187 = __p1_187; \ + uint8x8_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_187; \ + __ret_187 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_187), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_187, __p2_187)))); \ + __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_187; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 36); - return __ret; -} +#define vqrshrn_high_n_s32(__p0_188, __p1_188, __p2_188) __extension__ ({ \ + int16x4_t __s0_188 = __p0_188; \ + int32x4_t __s1_188 = __p1_188; \ + int16x8_t __ret_188; \ + __ret_188 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_188), (int16x4_t)(vqrshrn_n_s32(__s1_188, __p2_188)))); \ + __ret_188; \ +}) #else -__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { - poly8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_s32(__p0_189, __p1_189, __p2_189) __extension__ ({ \ + int16x4_t __s0_189 = __p0_189; \ + int32x4_t __s1_189 = __p1_189; \ + int16x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, 3, 2, 1, 0); \ + int32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, 3, 2, 1, 0); \ + int16x8_t __ret_189; \ + __ret_189 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_189), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_189, __p2_189)))); \ + __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_189; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 48); - return __ret; -} +#define vqrshrn_high_n_s64(__p0_190, __p1_190, __p2_190) __extension__ ({ \ + int32x2_t __s0_190 = __p0_190; \ + int64x2_t __s1_190 = __p1_190; \ + int32x4_t __ret_190; \ + __ret_190 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_190), (int32x2_t)(vqrshrn_n_s64(__s1_190, __p2_190)))); \ + __ret_190; \ +}) #else -__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { - uint8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_s64(__p0_191, __p1_191, __p2_191) __extension__ ({ \ + int32x2_t __s0_191 = __p0_191; \ + int64x2_t __s1_191 = __p1_191; \ + int32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, 1, 0); \ + int64x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, 1, 0); \ + int32x4_t __ret_191; \ + __ret_191 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_191), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_191, __p2_191)))); \ + __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, 3, 2, 1, 0); \ + __ret_191; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 32); - return __ret; -} +#define vqrshrn_high_n_s16(__p0_192, __p1_192, __p2_192) __extension__ ({ \ + int8x8_t __s0_192 = __p0_192; \ + int16x8_t __s1_192 = __p1_192; \ + int8x16_t __ret_192; \ + __ret_192 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_192), (int8x8_t)(vqrshrn_n_s16(__s1_192, __p2_192)))); \ + __ret_192; \ +}) #else -__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { - int8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrn_high_n_s16(__p0_193, __p1_193, __p2_193) __extension__ ({ \ + int8x8_t __s0_193 = __p0_193; \ + int16x8_t __s1_193 = __p1_193; \ + int8x8_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_193; \ + __ret_193 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_193), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_193, __p2_193)))); \ + __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_193; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 16); - return __ret; -} +#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { - uint8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 0); - return __ret; -} +#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { - int8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 4); - return __ret; -} +#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { - poly8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 36); - return __ret; -} +#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { - poly8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 48); - return __ret; -} +#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { - uint8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 32); - return __ret; -} +#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { - int8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 16); - return __ret; -} +#define vqrshrun_high_n_s32(__p0_194, __p1_194, __p2_194) __extension__ ({ \ + int16x4_t __s0_194 = __p0_194; \ + int32x4_t __s1_194 = __p1_194; \ + int16x8_t __ret_194; \ + __ret_194 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_194), (int16x4_t)(vqrshrun_n_s32(__s1_194, __p2_194)))); \ + __ret_194; \ +}) #else -__ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { - uint8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrun_high_n_s32(__p0_195, __p1_195, __p2_195) __extension__ ({ \ + int16x4_t __s0_195 = __p0_195; \ + int32x4_t __s1_195 = __p1_195; \ + int16x4_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 3, 2, 1, 0); \ + int32x4_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 3, 2, 1, 0); \ + int16x8_t __ret_195; \ + __ret_195 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_195), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_195, __p2_195)))); \ + __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_195; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 0); - return __ret; -} +#define vqrshrun_high_n_s64(__p0_196, __p1_196, __p2_196) __extension__ ({ \ + int32x2_t __s0_196 = __p0_196; \ + int64x2_t __s1_196 = __p1_196; \ + int32x4_t __ret_196; \ + __ret_196 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_196), (int32x2_t)(vqrshrun_n_s64(__s1_196, __p2_196)))); \ + __ret_196; \ +}) #else -__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { - int8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrun_high_n_s64(__p0_197, __p1_197, __p2_197) __extension__ ({ \ + int32x2_t __s0_197 = __p0_197; \ + int64x2_t __s1_197 = __p1_197; \ + int32x2_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 1, 0); \ + int64x2_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 1, 0); \ + int32x4_t __ret_197; \ + __ret_197 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_197), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_197, __p2_197)))); \ + __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, 3, 2, 1, 0); \ + __ret_197; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 4); - return __ret; -} +#define vqrshrun_high_n_s16(__p0_198, __p1_198, __p2_198) __extension__ ({ \ + int8x8_t __s0_198 = __p0_198; \ + int16x8_t __s1_198 = __p1_198; \ + int8x16_t __ret_198; \ + __ret_198 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_198), (int8x8_t)(vqrshrun_n_s16(__s1_198, __p2_198)))); \ + __ret_198; \ +}) #else -__ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { - poly8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrun_high_n_s16(__p0_199, __p1_199, __p2_199) __extension__ ({ \ + int8x8_t __s0_199 = __p0_199; \ + int16x8_t __s1_199 = __p1_199; \ + int8x8_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_199; \ + __ret_199 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_199), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_199, __p2_199)))); \ + __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_199; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 36); - return __ret; -} +#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { - poly8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 48); - return __ret; -} +#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { - uint8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 32); - return __ret; -} +#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { - int8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 16); +__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); return __ret; } #else -__ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { - uint8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 0); +__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); return __ret; } #else -__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { - int8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 4); +__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); return __ret; } #else -__ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); +__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); return __ret; } #else -__ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); +__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); return __ret; } #else -__ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); +__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); return __ret; } #else -__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 16); +__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); return __ret; } #else -__ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 0); +__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); return __ret; } #else -__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 4); - return __ret; -} +#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 36); - return __ret; -} +#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 48); - return __ret; -} +#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 32); - return __ret; -} +#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 16); - return __ret; -} +#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ + __ret; \ +}) +#else +#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 0); - return __ret; -} +#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 4); - return __ret; -} +#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 36); - return __ret; -} +#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ + __ret; \ +}) #else -__ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 48); - return __ret; -} +#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 32); - return __ret; -} +#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 16); - return __ret; -} +#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 0); - return __ret; -} +#define vqshrn_high_n_u32(__p0_200, __p1_200, __p2_200) __extension__ ({ \ + uint16x4_t __s0_200 = __p0_200; \ + uint32x4_t __s1_200 = __p1_200; \ + uint16x8_t __ret_200; \ + __ret_200 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_200), (uint16x4_t)(vqshrn_n_u32(__s1_200, __p2_200)))); \ + __ret_200; \ +}) #else -__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_u32(__p0_201, __p1_201, __p2_201) __extension__ ({ \ + uint16x4_t __s0_201 = __p0_201; \ + uint32x4_t __s1_201 = __p1_201; \ + uint16x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ + uint32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ + uint16x8_t __ret_201; \ + __ret_201 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_201), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_201, __p2_201)))); \ + __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_201; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 4); - return __ret; -} +#define vqshrn_high_n_u64(__p0_202, __p1_202, __p2_202) __extension__ ({ \ + uint32x2_t __s0_202 = __p0_202; \ + uint64x2_t __s1_202 = __p1_202; \ + uint32x4_t __ret_202; \ + __ret_202 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_202), (uint32x2_t)(vqshrn_n_u64(__s1_202, __p2_202)))); \ + __ret_202; \ +}) #else -__ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_u64(__p0_203, __p1_203, __p2_203) __extension__ ({ \ + uint32x2_t __s0_203 = __p0_203; \ + uint64x2_t __s1_203 = __p1_203; \ + uint32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ + uint64x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ + uint32x4_t __ret_203; \ + __ret_203 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_203), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_203, __p2_203)))); \ + __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, 3, 2, 1, 0); \ + __ret_203; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 36); - return __ret; -} +#define vqshrn_high_n_u16(__p0_204, __p1_204, __p2_204) __extension__ ({ \ + uint8x8_t __s0_204 = __p0_204; \ + uint16x8_t __s1_204 = __p1_204; \ + uint8x16_t __ret_204; \ + __ret_204 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_204), (uint8x8_t)(vqshrn_n_u16(__s1_204, __p2_204)))); \ + __ret_204; \ +}) #else -__ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_u16(__p0_205, __p1_205, __p2_205) __extension__ ({ \ + uint8x8_t __s0_205 = __p0_205; \ + uint16x8_t __s1_205 = __p1_205; \ + uint8x8_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_205; \ + __ret_205 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_205), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_205, __p2_205)))); \ + __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_205; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 48); - return __ret; -} +#define vqshrn_high_n_s32(__p0_206, __p1_206, __p2_206) __extension__ ({ \ + int16x4_t __s0_206 = __p0_206; \ + int32x4_t __s1_206 = __p1_206; \ + int16x8_t __ret_206; \ + __ret_206 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_206), (int16x4_t)(vqshrn_n_s32(__s1_206, __p2_206)))); \ + __ret_206; \ +}) #else -__ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_s32(__p0_207, __p1_207, __p2_207) __extension__ ({ \ + int16x4_t __s0_207 = __p0_207; \ + int32x4_t __s1_207 = __p1_207; \ + int16x4_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, 3, 2, 1, 0); \ + int32x4_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, 3, 2, 1, 0); \ + int16x8_t __ret_207; \ + __ret_207 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_207), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_207, __p2_207)))); \ + __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_207; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 32); - return __ret; -} +#define vqshrn_high_n_s64(__p0_208, __p1_208, __p2_208) __extension__ ({ \ + int32x2_t __s0_208 = __p0_208; \ + int64x2_t __s1_208 = __p1_208; \ + int32x4_t __ret_208; \ + __ret_208 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_208), (int32x2_t)(vqshrn_n_s64(__s1_208, __p2_208)))); \ + __ret_208; \ +}) #else -__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_s64(__p0_209, __p1_209, __p2_209) __extension__ ({ \ + int32x2_t __s0_209 = __p0_209; \ + int64x2_t __s1_209 = __p1_209; \ + int32x2_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, 1, 0); \ + int64x2_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, 1, 0); \ + int32x4_t __ret_209; \ + __ret_209 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_209), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_209, __p2_209)))); \ + __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, 3, 2, 1, 0); \ + __ret_209; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 16); - return __ret; -} +#define vqshrn_high_n_s16(__p0_210, __p1_210, __p2_210) __extension__ ({ \ + int8x8_t __s0_210 = __p0_210; \ + int16x8_t __s1_210 = __p1_210; \ + int8x16_t __ret_210; \ + __ret_210 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_210), (int8x8_t)(vqshrn_n_s16(__s1_210, __p2_210)))); \ + __ret_210; \ +}) #else -__ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrn_high_n_s16(__p0_211, __p1_211, __p2_211) __extension__ ({ \ + int8x8_t __s0_211 = __p0_211; \ + int16x8_t __s1_211 = __p1_211; \ + int8x8_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_211; \ + __ret_211 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_211), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_211, __p2_211)))); \ + __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_211; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 0); - return __ret; -} +#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x8_t __ret; - __ret = vcombine_u16(__p0, vraddhn_u32(__p1, __p2)); - return __ret; -} +#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) #else -__ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16(__rev0, __noswap_vraddhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x4_t __ret; - __ret = vcombine_u32(__p0, vraddhn_u64(__p1, __p2)); - return __ret; -} +#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) #else -__ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32(__rev0, __noswap_vraddhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { - uint8x16_t __ret; - __ret = vcombine_u8(__p0, vraddhn_u16(__p1, __p2)); - return __ret; -} +#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = __noswap_vcombine_u8(__rev0, __noswap_vraddhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x8_t __ret; - __ret = vcombine_s16(__p0, vraddhn_s32(__p1, __p2)); - return __ret; -} +#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vcombine_s16(__rev0, __noswap_vraddhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x4_t __ret; - __ret = vcombine_s32(__p0, vraddhn_s64(__p1, __p2)); - return __ret; -} +#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vcombine_s32(__rev0, __noswap_vraddhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} +#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x16_t __ret; - __ret = vcombine_s8(__p0, vraddhn_s16(__p1, __p2)); - return __ret; -} +#define vqshrun_high_n_s32(__p0_212, __p1_212, __p2_212) __extension__ ({ \ + int16x4_t __s0_212 = __p0_212; \ + int32x4_t __s1_212 = __p1_212; \ + int16x8_t __ret_212; \ + __ret_212 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_212), (int16x4_t)(vqshrun_n_s32(__s1_212, __p2_212)))); \ + __ret_212; \ +}) #else -__ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __noswap_vcombine_s8(__rev0, __noswap_vraddhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrun_high_n_s32(__p0_213, __p1_213, __p2_213) __extension__ ({ \ + int16x4_t __s0_213 = __p0_213; \ + int32x4_t __s1_213 = __p1_213; \ + int16x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ + int32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ + int16x8_t __ret_213; \ + __ret_213 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_213), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_213, __p2_213)))); \ + __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_213; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vrbit_p8(poly8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 4); - return __ret; -} +#define vqshrun_high_n_s64(__p0_214, __p1_214, __p2_214) __extension__ ({ \ + int32x2_t __s0_214 = __p0_214; \ + int64x2_t __s1_214 = __p1_214; \ + int32x4_t __ret_214; \ + __ret_214 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_214), (int32x2_t)(vqshrun_n_s64(__s1_214, __p2_214)))); \ + __ret_214; \ +}) #else -__ai poly8x8_t vrbit_p8(poly8x8_t __p0) { - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrun_high_n_s64(__p0_215, __p1_215, __p2_215) __extension__ ({ \ + int32x2_t __s0_215 = __p0_215; \ + int64x2_t __s1_215 = __p1_215; \ + int32x2_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, 1, 0); \ + int64x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, 1, 0); \ + int32x4_t __ret_215; \ + __ret_215 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_215), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_215, __p2_215)))); \ + __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, 3, 2, 1, 0); \ + __ret_215; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 36); - return __ret; -} +#define vqshrun_high_n_s16(__p0_216, __p1_216, __p2_216) __extension__ ({ \ + int8x8_t __s0_216 = __p0_216; \ + int16x8_t __s1_216 = __p1_216; \ + int8x16_t __ret_216; \ + __ret_216 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_216), (int8x8_t)(vqshrun_n_s16(__s1_216, __p2_216)))); \ + __ret_216; \ +}) #else -__ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrun_high_n_s16(__p0_217, __p1_217, __p2_217) __extension__ ({ \ + int8x8_t __s0_217 = __p0_217; \ + int16x8_t __s1_217 = __p1_217; \ + int8x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_217; \ + __ret_217 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_217), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_217, __p2_217)))); \ + __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_217; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 48); - return __ret; -} +#define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrbitq_s8(int8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 32); - return __ret; -} +#define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ + __ret; \ +}) #else -__ai int8x16_t vrbitq_s8(int8x16_t __p0) { - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vrbit_u8(uint8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 16); - return __ret; -} +#define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ + __ret; \ +}) #else -__ai uint8x8_t vrbit_u8(uint8x8_t __p0) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} +#define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vrbit_s8(int8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 0); +__ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); return __ret; } #else -__ai int8x8_t vrbit_s8(int8x8_t __p0) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrecpeq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 42); +__ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); return __ret; } #else -__ai float64x2_t vrecpeq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrecpe_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); +__ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); return __ret; } #else -__ai float64x1_t vrecpe_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); +__ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vrecped_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); +__ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); return __ret; } #else -__ai float64_t vrecped_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); +__ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vrecpes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); +__ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); return __ret; } #else -__ai float32_t vrecpes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); +__ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); return __ret; } #else -__ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); return __ret; } -#else -__ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai int32_t __noswap_vqsubs_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); +__ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); return __ret; } #else -__ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); +__ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); +__ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); return __ret; } #else -__ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); +__ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); + return __ret; +} +__ai int16_t __noswap_vqsubh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vrecpxd_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); +__ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else -__ai float64_t vrecpxd_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); +__ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vrecpxs_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); +__ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else -__ai float32_t vrecpxs_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); +__ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); +__ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); +__ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); +__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); +__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u32(__p0_218, __p1_218, __p2_218) __extension__ ({ \ - uint16x4_t __s0_218 = __p0_218; \ - uint32x4_t __s1_218 = __p1_218; \ - uint16x8_t __ret_218; \ - __ret_218 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_218), (uint16x4_t)(vrshrn_n_u32(__s1_218, __p2_218)))); \ - __ret_218; \ -}) -#else -#define vrshrn_high_n_u32(__p0_219, __p1_219, __p2_219) __extension__ ({ \ - uint16x4_t __s0_219 = __p0_219; \ - uint32x4_t __s1_219 = __p1_219; \ - uint16x4_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, 3, 2, 1, 0); \ - uint32x4_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, 3, 2, 1, 0); \ - uint16x8_t __ret_219; \ - __ret_219 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_219), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_219, __p2_219)))); \ - __ret_219 = __builtin_shufflevector(__ret_219, __ret_219, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_219; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u64(__p0_220, __p1_220, __p2_220) __extension__ ({ \ - uint32x2_t __s0_220 = __p0_220; \ - uint64x2_t __s1_220 = __p1_220; \ - uint32x4_t __ret_220; \ - __ret_220 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_220), (uint32x2_t)(vrshrn_n_u64(__s1_220, __p2_220)))); \ - __ret_220; \ -}) -#else -#define vrshrn_high_n_u64(__p0_221, __p1_221, __p2_221) __extension__ ({ \ - uint32x2_t __s0_221 = __p0_221; \ - uint64x2_t __s1_221 = __p1_221; \ - uint32x2_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, 1, 0); \ - uint64x2_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, 1, 0); \ - uint32x4_t __ret_221; \ - __ret_221 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_221), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_221, __p2_221)))); \ - __ret_221 = __builtin_shufflevector(__ret_221, __ret_221, 3, 2, 1, 0); \ - __ret_221; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u16(__p0_222, __p1_222, __p2_222) __extension__ ({ \ - uint8x8_t __s0_222 = __p0_222; \ - uint16x8_t __s1_222 = __p1_222; \ - uint8x16_t __ret_222; \ - __ret_222 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_222), (uint8x8_t)(vrshrn_n_u16(__s1_222, __p2_222)))); \ - __ret_222; \ -}) +__ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} #else -#define vrshrn_high_n_u16(__p0_223, __p1_223, __p2_223) __extension__ ({ \ - uint8x8_t __s0_223 = __p0_223; \ - uint16x8_t __s1_223 = __p1_223; \ - uint8x8_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_223; \ - __ret_223 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_223), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_223, __p2_223)))); \ - __ret_223 = __builtin_shufflevector(__ret_223, __ret_223, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_223; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s32(__p0_224, __p1_224, __p2_224) __extension__ ({ \ - int16x4_t __s0_224 = __p0_224; \ - int32x4_t __s1_224 = __p1_224; \ - int16x8_t __ret_224; \ - __ret_224 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_224), (int16x4_t)(vrshrn_n_s32(__s1_224, __p2_224)))); \ - __ret_224; \ -}) +__ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 0); + return __ret; +} #else -#define vrshrn_high_n_s32(__p0_225, __p1_225, __p2_225) __extension__ ({ \ - int16x4_t __s0_225 = __p0_225; \ - int32x4_t __s1_225 = __p1_225; \ - int16x4_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, 3, 2, 1, 0); \ - int32x4_t __rev1_225; __rev1_225 = __builtin_shufflevector(__s1_225, __s1_225, 3, 2, 1, 0); \ - int16x8_t __ret_225; \ - __ret_225 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_225), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_225, __p2_225)))); \ - __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_225; \ -}) +__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s64(__p0_226, __p1_226, __p2_226) __extension__ ({ \ - int32x2_t __s0_226 = __p0_226; \ - int64x2_t __s1_226 = __p1_226; \ - int32x4_t __ret_226; \ - __ret_226 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_226), (int32x2_t)(vrshrn_n_s64(__s1_226, __p2_226)))); \ - __ret_226; \ -}) +__ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 4); + return __ret; +} #else -#define vrshrn_high_n_s64(__p0_227, __p1_227, __p2_227) __extension__ ({ \ - int32x2_t __s0_227 = __p0_227; \ - int64x2_t __s1_227 = __p1_227; \ - int32x2_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, 1, 0); \ - int64x2_t __rev1_227; __rev1_227 = __builtin_shufflevector(__s1_227, __s1_227, 1, 0); \ - int32x4_t __ret_227; \ - __ret_227 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_227), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_227, __p2_227)))); \ - __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, 3, 2, 1, 0); \ - __ret_227; \ -}) +__ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { + poly8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s16(__p0_228, __p1_228, __p2_228) __extension__ ({ \ - int8x8_t __s0_228 = __p0_228; \ - int16x8_t __s1_228 = __p1_228; \ - int8x16_t __ret_228; \ - __ret_228 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_228), (int8x8_t)(vrshrn_n_s16(__s1_228, __p2_228)))); \ - __ret_228; \ -}) +__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 36); + return __ret; +} #else -#define vrshrn_high_n_s16(__p0_229, __p1_229, __p2_229) __extension__ ({ \ - int8x8_t __s0_229 = __p0_229; \ - int16x8_t __s1_229 = __p1_229; \ - int8x8_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_229; __rev1_229 = __builtin_shufflevector(__s1_229, __s1_229, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_229; \ - __ret_229 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_229), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_229, __p2_229)))); \ - __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_229; \ -}) +__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { + poly8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 42); +__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 48); return __ret; } #else -__ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { + uint8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrsqrte_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); +__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 32); return __ret; } #else -__ai float64x1_t vrsqrte_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); +__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { + int8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vrsqrted_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); +__ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 16); return __ret; } #else -__ai float64_t vrsqrted_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); +__ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { + uint8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vrsqrtes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); +__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 0); return __ret; } #else -__ai float32_t vrsqrtes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); +__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { + int8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); +__ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 4); return __ret; } #else -__ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { + poly8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 36); return __ret; } #else -__ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { + poly8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); +__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 48); return __ret; } #else -__ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); +__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { + uint8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); +__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 32); return __ret; } #else -__ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); +__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { + int8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 16); + return __ret; +} #else -#define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { + uint8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 0); + return __ret; +} #else -#define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { + int8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x8_t __ret; - __ret = vcombine_u16(__p0, vrsubhn_u32(__p1, __p2)); +__ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 4); return __ret; } #else -__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = __noswap_vcombine_u16(__rev0, __noswap_vrsubhn_u32(__rev1, __rev2)); +__ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { + poly8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x4_t __ret; - __ret = vcombine_u32(__p0, vrsubhn_u64(__p1, __p2)); +__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 36); return __ret; } #else -__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - uint32x4_t __ret; - __ret = __noswap_vcombine_u32(__rev0, __noswap_vrsubhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { + poly8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { +__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = vcombine_u8(__p0, vrsubhn_u16(__p1, __p2)); + __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 48); return __ret; } #else -__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { + uint8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = __noswap_vcombine_u8(__rev0, __noswap_vrsubhn_u16(__rev1, __rev2)); + __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x8_t __ret; - __ret = vcombine_s16(__p0, vrsubhn_s32(__p1, __p2)); +__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 32); return __ret; } #else -__ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - int16x8_t __ret; - __ret = __noswap_vcombine_s16(__rev0, __noswap_vrsubhn_s32(__rev1, __rev2)); +__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { + int8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 16); + return __ret; +} +#else +__ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { + uint8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x4_t __ret; - __ret = vcombine_s32(__p0, vrsubhn_s64(__p1, __p2)); +__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 0); return __ret; } #else -__ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - int32x4_t __ret; - __ret = __noswap_vcombine_s32(__rev0, __noswap_vrsubhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { + int8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x16_t __ret; - __ret = vcombine_s8(__p0, vrsubhn_s16(__p1, __p2)); +__ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 4); return __ret; } #else -__ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __ret; - __ret = __noswap_vcombine_s8(__rev0, __noswap_vrsubhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +__ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) +__ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); + return __ret; +} #else -#define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) +__ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) +__ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); + return __ret; +} #else -#define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) +__ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) +__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); + return __ret; +} #else -#define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) -#define __noswap_vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ - __ret; \ -}) +__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) +__ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 16); + return __ret; +} #else -#define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) +__ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); +__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 0); return __ret; } #else -__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); +__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); +__ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 4); return __ret; } #else -__ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); +__ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 36); + return __ret; +} #else -#define vshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 48); + return __ret; +} #else -#define vshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u8(__p0_230, __p1_230) __extension__ ({ \ - uint8x16_t __s0_230 = __p0_230; \ - uint16x8_t __ret_230; \ - __ret_230 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_230), __p1_230)); \ - __ret_230; \ -}) +__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 32); + return __ret; +} #else -#define vshll_high_n_u8(__p0_231, __p1_231) __extension__ ({ \ - uint8x16_t __s0_231 = __p0_231; \ - uint8x16_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_231; \ - __ret_231 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_231), __p1_231)); \ - __ret_231 = __builtin_shufflevector(__ret_231, __ret_231, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_231; \ -}) +__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u32(__p0_232, __p1_232) __extension__ ({ \ - uint32x4_t __s0_232 = __p0_232; \ - uint64x2_t __ret_232; \ - __ret_232 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_232), __p1_232)); \ - __ret_232; \ -}) +__ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 16); + return __ret; +} #else -#define vshll_high_n_u32(__p0_233, __p1_233) __extension__ ({ \ - uint32x4_t __s0_233 = __p0_233; \ - uint32x4_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, 3, 2, 1, 0); \ - uint64x2_t __ret_233; \ - __ret_233 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_233), __p1_233)); \ - __ret_233 = __builtin_shufflevector(__ret_233, __ret_233, 1, 0); \ - __ret_233; \ -}) +__ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u16(__p0_234, __p1_234) __extension__ ({ \ - uint16x8_t __s0_234 = __p0_234; \ - uint32x4_t __ret_234; \ - __ret_234 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_234), __p1_234)); \ - __ret_234; \ -}) +__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 0); + return __ret; +} #else -#define vshll_high_n_u16(__p0_235, __p1_235) __extension__ ({ \ - uint16x8_t __s0_235 = __p0_235; \ - uint16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_235; \ - __ret_235 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_235), __p1_235)); \ - __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, 3, 2, 1, 0); \ - __ret_235; \ -}) +__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s8(__p0_236, __p1_236) __extension__ ({ \ - int8x16_t __s0_236 = __p0_236; \ - int16x8_t __ret_236; \ - __ret_236 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_236), __p1_236)); \ - __ret_236; \ -}) +__ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 4); + return __ret; +} #else -#define vshll_high_n_s8(__p0_237, __p1_237) __extension__ ({ \ - int8x16_t __s0_237 = __p0_237; \ - int8x16_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_237; \ - __ret_237 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_237), __p1_237)); \ - __ret_237 = __builtin_shufflevector(__ret_237, __ret_237, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_237; \ -}) +__ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s32(__p0_238, __p1_238) __extension__ ({ \ - int32x4_t __s0_238 = __p0_238; \ - int64x2_t __ret_238; \ - __ret_238 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_238), __p1_238)); \ - __ret_238; \ -}) +__ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 36); + return __ret; +} #else -#define vshll_high_n_s32(__p0_239, __p1_239) __extension__ ({ \ - int32x4_t __s0_239 = __p0_239; \ - int32x4_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 3, 2, 1, 0); \ - int64x2_t __ret_239; \ - __ret_239 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_239), __p1_239)); \ - __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 1, 0); \ - __ret_239; \ -}) +__ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s16(__p0_240, __p1_240) __extension__ ({ \ - int16x8_t __s0_240 = __p0_240; \ - int32x4_t __ret_240; \ - __ret_240 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_240), __p1_240)); \ - __ret_240; \ -}) +__ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 48); + return __ret; +} #else -#define vshll_high_n_s16(__p0_241, __p1_241) __extension__ ({ \ - int16x8_t __s0_241 = __p0_241; \ - int16x8_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_241; \ - __ret_241 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_241), __p1_241)); \ - __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 3, 2, 1, 0); \ - __ret_241; \ -}) +__ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 32); + return __ret; +} #else -#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ - __ret; \ -}) +__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 16); + return __ret; +} #else -#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ - __ret; \ -}) +__ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u32(__p0_242, __p1_242, __p2_242) __extension__ ({ \ - uint16x4_t __s0_242 = __p0_242; \ - uint32x4_t __s1_242 = __p1_242; \ - uint16x8_t __ret_242; \ - __ret_242 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_242), (uint16x4_t)(vshrn_n_u32(__s1_242, __p2_242)))); \ - __ret_242; \ -}) +__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 0); + return __ret; +} #else -#define vshrn_high_n_u32(__p0_243, __p1_243, __p2_243) __extension__ ({ \ - uint16x4_t __s0_243 = __p0_243; \ - uint32x4_t __s1_243 = __p1_243; \ - uint16x4_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 3, 2, 1, 0); \ - uint32x4_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 3, 2, 1, 0); \ - uint16x8_t __ret_243; \ - __ret_243 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_243), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_243, __p2_243)))); \ - __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_243; \ -}) +__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u64(__p0_244, __p1_244, __p2_244) __extension__ ({ \ - uint32x2_t __s0_244 = __p0_244; \ - uint64x2_t __s1_244 = __p1_244; \ - uint32x4_t __ret_244; \ - __ret_244 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_244), (uint32x2_t)(vshrn_n_u64(__s1_244, __p2_244)))); \ - __ret_244; \ -}) +__ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 4); + return __ret; +} #else -#define vshrn_high_n_u64(__p0_245, __p1_245, __p2_245) __extension__ ({ \ - uint32x2_t __s0_245 = __p0_245; \ - uint64x2_t __s1_245 = __p1_245; \ - uint32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ - uint64x2_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 1, 0); \ - uint32x4_t __ret_245; \ - __ret_245 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_245), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_245, __p2_245)))); \ - __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, 3, 2, 1, 0); \ - __ret_245; \ -}) +__ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u16(__p0_246, __p1_246, __p2_246) __extension__ ({ \ - uint8x8_t __s0_246 = __p0_246; \ - uint16x8_t __s1_246 = __p1_246; \ - uint8x16_t __ret_246; \ - __ret_246 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_246), (uint8x8_t)(vshrn_n_u16(__s1_246, __p2_246)))); \ - __ret_246; \ -}) +__ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 36); + return __ret; +} #else -#define vshrn_high_n_u16(__p0_247, __p1_247, __p2_247) __extension__ ({ \ - uint8x8_t __s0_247 = __p0_247; \ - uint16x8_t __s1_247 = __p1_247; \ - uint8x8_t __rev0_247; __rev0_247 = __builtin_shufflevector(__s0_247, __s0_247, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_247; __rev1_247 = __builtin_shufflevector(__s1_247, __s1_247, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_247; \ - __ret_247 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_247), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_247, __p2_247)))); \ - __ret_247 = __builtin_shufflevector(__ret_247, __ret_247, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_247; \ -}) +__ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 48); + return __ret; +} +#else +__ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s32(__p0_248, __p1_248, __p2_248) __extension__ ({ \ - int16x4_t __s0_248 = __p0_248; \ - int32x4_t __s1_248 = __p1_248; \ - int16x8_t __ret_248; \ - __ret_248 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_248), (int16x4_t)(vshrn_n_s32(__s1_248, __p2_248)))); \ - __ret_248; \ -}) +__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 32); + return __ret; +} #else -#define vshrn_high_n_s32(__p0_249, __p1_249, __p2_249) __extension__ ({ \ - int16x4_t __s0_249 = __p0_249; \ - int32x4_t __s1_249 = __p1_249; \ - int16x4_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 3, 2, 1, 0); \ - int32x4_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, 3, 2, 1, 0); \ - int16x8_t __ret_249; \ - __ret_249 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_249), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_249, __p2_249)))); \ - __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_249; \ -}) +__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s64(__p0_250, __p1_250, __p2_250) __extension__ ({ \ - int32x2_t __s0_250 = __p0_250; \ - int64x2_t __s1_250 = __p1_250; \ - int32x4_t __ret_250; \ - __ret_250 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_250), (int32x2_t)(vshrn_n_s64(__s1_250, __p2_250)))); \ - __ret_250; \ -}) +__ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 16); + return __ret; +} #else -#define vshrn_high_n_s64(__p0_251, __p1_251, __p2_251) __extension__ ({ \ - int32x2_t __s0_251 = __p0_251; \ - int64x2_t __s1_251 = __p1_251; \ - int32x2_t __rev0_251; __rev0_251 = __builtin_shufflevector(__s0_251, __s0_251, 1, 0); \ - int64x2_t __rev1_251; __rev1_251 = __builtin_shufflevector(__s1_251, __s1_251, 1, 0); \ - int32x4_t __ret_251; \ - __ret_251 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_251), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_251, __p2_251)))); \ - __ret_251 = __builtin_shufflevector(__ret_251, __ret_251, 3, 2, 1, 0); \ - __ret_251; \ -}) +__ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s16(__p0_252, __p1_252, __p2_252) __extension__ ({ \ - int8x8_t __s0_252 = __p0_252; \ - int16x8_t __s1_252 = __p1_252; \ - int8x16_t __ret_252; \ - __ret_252 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_252), (int8x8_t)(vshrn_n_s16(__s1_252, __p2_252)))); \ - __ret_252; \ -}) +__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 0); + return __ret; +} #else -#define vshrn_high_n_s16(__p0_253, __p1_253, __p2_253) __extension__ ({ \ - int8x8_t __s0_253 = __p0_253; \ - int16x8_t __s1_253 = __p1_253; \ - int8x8_t __rev0_253; __rev0_253 = __builtin_shufflevector(__s0_253, __s0_253, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_253; __rev1_253 = __builtin_shufflevector(__s1_253, __s1_253, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_253; \ - __ret_253 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_253), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_253, __p2_253)))); \ - __ret_253 = __builtin_shufflevector(__ret_253, __ret_253, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_253; \ -}) +__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 0); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x8_t __ret; + __ret = vcombine_u16(__p0, vraddhn_u32(__p1, __p2)); + return __ret; +} #else -#define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16(__rev0, __noswap_vraddhn_u32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { + uint32x4_t __ret; + __ret = vcombine_u32(__p0, vraddhn_u64(__p1, __p2)); + return __ret; +} #else -#define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vcombine_u32(__rev0, __noswap_vraddhn_u64(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) +__ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x16_t __ret; + __ret = vcombine_u8(__p0, vraddhn_u16(__p1, __p2)); + return __ret; +} #else -#define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) +__ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8(__rev0, __noswap_vraddhn_u16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ - __ret; \ -}) +__ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x8_t __ret; + __ret = vcombine_s16(__p0, vraddhn_s32(__p1, __p2)); + return __ret; +} #else -#define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ - __ret; \ -}) +__ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vcombine_s16(__rev0, __noswap_vraddhn_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); +__ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x4_t __ret; + __ret = vcombine_s32(__p0, vraddhn_s64(__p1, __p2)); return __ret; } #else -__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); +__ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vcombine_s32(__rev0, __noswap_vraddhn_s64(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); +__ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x16_t __ret; + __ret = vcombine_s8(__p0, vraddhn_s16(__p1, __p2)); return __ret; } #else -__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); +__ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __noswap_vcombine_s8(__rev0, __noswap_vraddhn_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); +__ai poly8x8_t vrbit_p8(poly8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 4); return __ret; } #else -__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); +__ai poly8x8_t vrbit_p8(poly8x8_t __p0) { + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __ret; + __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 4); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); +__ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 36); return __ret; } #else -__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); +__ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __ret; + __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 36); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 48); return __ret; } #else -__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); +__ai int8x16_t vrbitq_s8(int8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 32); return __ret; } #else -__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai int8x16_t vrbitq_s8(int8x16_t __p0) { + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 32); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); +__ai uint8x8_t vrbit_u8(uint8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 16); return __ret; } #else -__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai uint8x8_t vrbit_u8(uint8x8_t __p0) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); +__ai int8x8_t vrbit_s8(int8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 0); return __ret; } #else -__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); +__ai int8x8_t vrbit_s8(int8x8_t __p0) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __ret; + __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); +__ai float64x2_t vrecpeq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 42); return __ret; } #else -__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); +__ai float64x2_t vrecpeq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); +__ai float64x1_t vrecpe_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); return __ret; } #else -__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float64x1_t vrecpe_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float64_t vrecped_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); return __ret; } #else -__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); +__ai float64_t vrecped_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); +__ai float32_t vrecpes_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); return __ret; } #else -__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float32_t vrecpes_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vsqrtq_f64(float64x2_t __p0) { +__ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 42); + __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else -__ai float64x2_t vsqrtq_f64(float64x2_t __p0) { +__ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 42); + __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vsqrtq_f32(float32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 41); +__ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #else -__ai float32x4_t vsqrtq_f32(float32x4_t __p0) { - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); +__ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vsqrt_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); +__ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); return __ret; } #else -__ai float64x1_t vsqrt_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); +__ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vsqrt_f32(float32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 9); +__ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); return __ret; } #else -__ai float32x2_t vsqrt_f32(float32x2_t __p0) { - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); +__ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai float64_t vrecpxd_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); + return __ret; +} #else -#define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai float64_t vrecpxd_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32_t vrecpxs_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); + return __ret; +} +#else +__ai float32_t vrecpxs_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); + return __ret; +} +#else +__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); + return __ret; +} #else -#define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) +__ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ + __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ __ret; \ }) #else -#define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ +#define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ + __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ + __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ __ret; \ }) #else -#define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ +#define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#else -#define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ - __ret; \ -}) -#else -#define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s0 = __p0; \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ -}) -#else -#define vst1_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vst1q_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 38); \ -}) -#else -#define vst1q_p64(__p0, __p1) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 38); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vst1q_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 42); \ +#define vrshrn_high_n_u32(__p0_218, __p1_218, __p2_218) __extension__ ({ \ + uint16x4_t __s0_218 = __p0_218; \ + uint32x4_t __s1_218 = __p1_218; \ + uint16x8_t __ret_218; \ + __ret_218 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_218), (uint16x4_t)(vrshrn_n_u32(__s1_218, __p2_218)))); \ + __ret_218; \ }) #else -#define vst1q_f64(__p0, __p1) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 42); \ +#define vrshrn_high_n_u32(__p0_219, __p1_219, __p2_219) __extension__ ({ \ + uint16x4_t __s0_219 = __p0_219; \ + uint32x4_t __s1_219 = __p1_219; \ + uint16x4_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, 3, 2, 1, 0); \ + uint32x4_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, 3, 2, 1, 0); \ + uint16x8_t __ret_219; \ + __ret_219 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_219), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_219, __p2_219)))); \ + __ret_219 = __builtin_shufflevector(__ret_219, __ret_219, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_219; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ +#define vrshrn_high_n_u64(__p0_220, __p1_220, __p2_220) __extension__ ({ \ + uint32x2_t __s0_220 = __p0_220; \ + uint64x2_t __s1_220 = __p1_220; \ + uint32x4_t __ret_220; \ + __ret_220 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_220), (uint32x2_t)(vrshrn_n_u64(__s1_220, __p2_220)))); \ + __ret_220; \ }) #else -#define vst1_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ +#define vrshrn_high_n_u64(__p0_221, __p1_221, __p2_221) __extension__ ({ \ + uint32x2_t __s0_221 = __p0_221; \ + uint64x2_t __s1_221 = __p1_221; \ + uint32x2_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, 1, 0); \ + uint64x2_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, 1, 0); \ + uint32x4_t __ret_221; \ + __ret_221 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_221), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_221, __p2_221)))); \ + __ret_221 = __builtin_shufflevector(__ret_221, __ret_221, 3, 2, 1, 0); \ + __ret_221; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ +#define vrshrn_high_n_u16(__p0_222, __p1_222, __p2_222) __extension__ ({ \ + uint8x8_t __s0_222 = __p0_222; \ + uint16x8_t __s1_222 = __p1_222; \ + uint8x16_t __ret_222; \ + __ret_222 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_222), (uint8x8_t)(vrshrn_n_u16(__s1_222, __p2_222)))); \ + __ret_222; \ }) #else -#define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ +#define vrshrn_high_n_u16(__p0_223, __p1_223, __p2_223) __extension__ ({ \ + uint8x8_t __s0_223 = __p0_223; \ + uint16x8_t __s1_223 = __p1_223; \ + uint8x8_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_223; \ + __ret_223 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_223), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_223, __p2_223)))); \ + __ret_223 = __builtin_shufflevector(__ret_223, __ret_223, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_223; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ +#define vrshrn_high_n_s32(__p0_224, __p1_224, __p2_224) __extension__ ({ \ + int16x4_t __s0_224 = __p0_224; \ + int32x4_t __s1_224 = __p1_224; \ + int16x8_t __ret_224; \ + __ret_224 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_224), (int16x4_t)(vrshrn_n_s32(__s1_224, __p2_224)))); \ + __ret_224; \ }) #else -#define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ +#define vrshrn_high_n_s32(__p0_225, __p1_225, __p2_225) __extension__ ({ \ + int16x4_t __s0_225 = __p0_225; \ + int32x4_t __s1_225 = __p1_225; \ + int16x4_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, 3, 2, 1, 0); \ + int32x4_t __rev1_225; __rev1_225 = __builtin_shufflevector(__s1_225, __s1_225, 3, 2, 1, 0); \ + int16x8_t __ret_225; \ + __ret_225 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_225), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_225, __p2_225)))); \ + __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_225; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ +#define vrshrn_high_n_s64(__p0_226, __p1_226, __p2_226) __extension__ ({ \ + int32x2_t __s0_226 = __p0_226; \ + int64x2_t __s1_226 = __p1_226; \ + int32x4_t __ret_226; \ + __ret_226 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_226), (int32x2_t)(vrshrn_n_s64(__s1_226, __p2_226)))); \ + __ret_226; \ }) #else -#define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ +#define vrshrn_high_n_s64(__p0_227, __p1_227, __p2_227) __extension__ ({ \ + int32x2_t __s0_227 = __p0_227; \ + int64x2_t __s1_227 = __p1_227; \ + int32x2_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, 1, 0); \ + int64x2_t __rev1_227; __rev1_227 = __builtin_shufflevector(__s1_227, __s1_227, 1, 0); \ + int32x4_t __ret_227; \ + __ret_227 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_227), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_227, __p2_227)))); \ + __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, 3, 2, 1, 0); \ + __ret_227; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ +#define vrshrn_high_n_s16(__p0_228, __p1_228, __p2_228) __extension__ ({ \ + int8x8_t __s0_228 = __p0_228; \ + int16x8_t __s1_228 = __p1_228; \ + int8x16_t __ret_228; \ + __ret_228 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_228), (int8x8_t)(vrshrn_n_s16(__s1_228, __p2_228)))); \ + __ret_228; \ }) #else -#define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ +#define vrshrn_high_n_s16(__p0_229, __p1_229, __p2_229) __extension__ ({ \ + int8x8_t __s0_229 = __p0_229; \ + int16x8_t __s1_229 = __p1_229; \ + int8x8_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_229; __rev1_229 = __builtin_shufflevector(__s1_229, __s1_229, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_229; \ + __ret_229 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_229), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_229, __p2_229)))); \ + __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_229; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p8_x2(__p0, __p1) __extension__ ({ \ - poly8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ -}) +__ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 42); + return __ret; +} #else -#define vst1_p8_x2(__p0, __p1) __extension__ ({ \ - poly8x8x2_t __s1 = __p1; \ - poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ -}) +__ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p64_x2(__p0, __p1) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ -}) +__ai float64x1_t vrsqrte_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); + return __ret; +} #else -#define vst1_p64_x2(__p0, __p1) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ -}) +__ai float64x1_t vrsqrte_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p16_x2(__p0, __p1) __extension__ ({ \ - poly16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ -}) +__ai float64_t vrsqrted_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); + return __ret; +} #else -#define vst1_p16_x2(__p0, __p1) __extension__ ({ \ - poly16x4x2_t __s1 = __p1; \ - poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ -}) +__ai float64_t vrsqrted_f64(float64_t __p0) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ - poly8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ -}) +__ai float32_t vrsqrtes_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); + return __ret; +} #else -#define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ - poly8x16x2_t __s1 = __p1; \ - poly8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ -}) +__ai float32_t vrsqrtes_f32(float32_t __p0) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ - poly64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ -}) +__ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} #else -#define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ - poly64x2x2_t __s1 = __p1; \ - poly64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ -}) +__ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ - poly16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ -}) +__ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + return __ret; +} #else -#define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ - poly16x8x2_t __s1 = __p1; \ - poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ -}) +__ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ - uint8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ -}) +__ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); + return __ret; +} #else -#define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ - uint8x16x2_t __s1 = __p1; \ - uint8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ -}) +__ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { + float64_t __ret; + __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ - uint32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ -}) +__ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); + return __ret; +} #else -#define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ - uint32x4x2_t __s1 = __p1; \ - uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ -}) +__ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { + float32_t __ret; + __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ - uint64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ +#define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ - uint64x2x2_t __s1 = __p1; \ - uint64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ +#define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ - uint16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ +#define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ - uint16x8x2_t __s1 = __p1; \ - uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ +#define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ - int8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ -}) +__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x8_t __ret; + __ret = vcombine_u16(__p0, vrsubhn_u32(__p1, __p2)); + return __ret; +} #else -#define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ - int8x16x2_t __s1 = __p1; \ - int8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ -}) +__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = __noswap_vcombine_u16(__rev0, __noswap_vrsubhn_u32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ - float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 42); \ -}) +__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { + uint32x4_t __ret; + __ret = vcombine_u32(__p0, vrsubhn_u64(__p1, __p2)); + return __ret; +} #else -#define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ - float64x2x2_t __s1 = __p1; \ - float64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 42); \ -}) +__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x4_t __ret; + __ret = __noswap_vcombine_u32(__rev0, __noswap_vrsubhn_u64(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ - float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 41); \ -}) +__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x16_t __ret; + __ret = vcombine_u8(__p0, vrsubhn_u16(__p1, __p2)); + return __ret; +} #else -#define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ - float32x4x2_t __s1 = __p1; \ - float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 41); \ -}) +__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = __noswap_vcombine_u8(__rev0, __noswap_vrsubhn_u16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 40); \ -}) +__ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x8_t __ret; + __ret = vcombine_s16(__p0, vrsubhn_s32(__p1, __p2)); + return __ret; +} #else -#define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ - float16x8x2_t __s1 = __p1; \ - float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ -}) +__ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x8_t __ret; + __ret = __noswap_vcombine_s16(__rev0, __noswap_vrsubhn_s32(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ - int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 34); \ -}) +__ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x4_t __ret; + __ret = vcombine_s32(__p0, vrsubhn_s64(__p1, __p2)); + return __ret; +} #else -#define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ - int32x4x2_t __s1 = __p1; \ - int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 34); \ -}) +__ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x4_t __ret; + __ret = __noswap_vcombine_s32(__rev0, __noswap_vrsubhn_s64(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ - int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 35); \ -}) +__ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x16_t __ret; + __ret = vcombine_s8(__p0, vrsubhn_s16(__p1, __p2)); + return __ret; +} #else -#define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ - int64x2x2_t __s1 = __p1; \ - int64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 35); \ -}) +__ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __ret; + __ret = __noswap_vcombine_s8(__rev0, __noswap_vrsubhn_s16(__rev1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ - int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 33); \ +#define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ }) #else -#define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ - int16x8x2_t __s1 = __p1; \ - int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 33); \ +#define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) +#define __noswap_vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u8_x2(__p0, __p1) __extension__ ({ \ - uint8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ +#define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ }) #else -#define vst1_u8_x2(__p0, __p1) __extension__ ({ \ - uint8x8x2_t __s1 = __p1; \ - uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ +#define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u32_x2(__p0, __p1) __extension__ ({ \ - uint32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ +#define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ }) #else -#define vst1_u32_x2(__p0, __p1) __extension__ ({ \ - uint32x2x2_t __s1 = __p1; \ - uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ +#define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#define __noswap_vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __ret; \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u64_x2(__p0, __p1) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ +#define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ }) #else -#define vst1_u64_x2(__p0, __p1) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ +#define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ +}) +#define __noswap_vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64_t __s0 = __p0; \ + float64x1_t __s1 = __p1; \ + float64x1_t __ret; \ + __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u16_x2(__p0, __p1) __extension__ ({ \ - uint16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ -}) +__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); + return __ret; +} #else -#define vst1_u16_x2(__p0, __p1) __extension__ ({ \ - uint16x4x2_t __s1 = __p1; \ - uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ -}) +__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s8_x2(__p0, __p1) __extension__ ({ \ - int8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ -}) +__ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); + return __ret; +} #else -#define vst1_s8_x2(__p0, __p1) __extension__ ({ \ - int8x8x2_t __s1 = __p1; \ - int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ -}) +__ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f64_x2(__p0, __p1) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ +#define vshld_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ + __ret; \ }) #else -#define vst1_f64_x2(__p0, __p1) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ +#define vshld_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f32_x2(__p0, __p1) __extension__ ({ \ - float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 9); \ +#define vshld_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ + __ret; \ }) #else -#define vst1_f32_x2(__p0, __p1) __extension__ ({ \ - float32x2x2_t __s1 = __p1; \ - float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 9); \ +#define vshld_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f16_x2(__p0, __p1) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 8); \ +#define vshll_high_n_u8(__p0_230, __p1_230) __extension__ ({ \ + uint8x16_t __s0_230 = __p0_230; \ + uint16x8_t __ret_230; \ + __ret_230 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_230), __p1_230)); \ + __ret_230; \ }) #else -#define vst1_f16_x2(__p0, __p1) __extension__ ({ \ - float16x4x2_t __s1 = __p1; \ - float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ +#define vshll_high_n_u8(__p0_231, __p1_231) __extension__ ({ \ + uint8x16_t __s0_231 = __p0_231; \ + uint8x16_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_231; \ + __ret_231 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_231), __p1_231)); \ + __ret_231 = __builtin_shufflevector(__ret_231, __ret_231, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_231; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s32_x2(__p0, __p1) __extension__ ({ \ - int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 2); \ +#define vshll_high_n_u32(__p0_232, __p1_232) __extension__ ({ \ + uint32x4_t __s0_232 = __p0_232; \ + uint64x2_t __ret_232; \ + __ret_232 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_232), __p1_232)); \ + __ret_232; \ }) #else -#define vst1_s32_x2(__p0, __p1) __extension__ ({ \ - int32x2x2_t __s1 = __p1; \ - int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 2); \ +#define vshll_high_n_u32(__p0_233, __p1_233) __extension__ ({ \ + uint32x4_t __s0_233 = __p0_233; \ + uint32x4_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, 3, 2, 1, 0); \ + uint64x2_t __ret_233; \ + __ret_233 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_233), __p1_233)); \ + __ret_233 = __builtin_shufflevector(__ret_233, __ret_233, 1, 0); \ + __ret_233; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s64_x2(__p0, __p1) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ +#define vshll_high_n_u16(__p0_234, __p1_234) __extension__ ({ \ + uint16x8_t __s0_234 = __p0_234; \ + uint32x4_t __ret_234; \ + __ret_234 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_234), __p1_234)); \ + __ret_234; \ }) #else -#define vst1_s64_x2(__p0, __p1) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ +#define vshll_high_n_u16(__p0_235, __p1_235) __extension__ ({ \ + uint16x8_t __s0_235 = __p0_235; \ + uint16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_235; \ + __ret_235 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_235), __p1_235)); \ + __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, 3, 2, 1, 0); \ + __ret_235; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s16_x2(__p0, __p1) __extension__ ({ \ - int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 1); \ +#define vshll_high_n_s8(__p0_236, __p1_236) __extension__ ({ \ + int8x16_t __s0_236 = __p0_236; \ + int16x8_t __ret_236; \ + __ret_236 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_236), __p1_236)); \ + __ret_236; \ }) #else -#define vst1_s16_x2(__p0, __p1) __extension__ ({ \ - int16x4x2_t __s1 = __p1; \ - int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 1); \ +#define vshll_high_n_s8(__p0_237, __p1_237) __extension__ ({ \ + int8x16_t __s0_237 = __p0_237; \ + int8x16_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_237; \ + __ret_237 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_237), __p1_237)); \ + __ret_237 = __builtin_shufflevector(__ret_237, __ret_237, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_237; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p8_x3(__p0, __p1) __extension__ ({ \ - poly8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ +#define vshll_high_n_s32(__p0_238, __p1_238) __extension__ ({ \ + int32x4_t __s0_238 = __p0_238; \ + int64x2_t __ret_238; \ + __ret_238 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_238), __p1_238)); \ + __ret_238; \ }) #else -#define vst1_p8_x3(__p0, __p1) __extension__ ({ \ - poly8x8x3_t __s1 = __p1; \ - poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ +#define vshll_high_n_s32(__p0_239, __p1_239) __extension__ ({ \ + int32x4_t __s0_239 = __p0_239; \ + int32x4_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 3, 2, 1, 0); \ + int64x2_t __ret_239; \ + __ret_239 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_239), __p1_239)); \ + __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 1, 0); \ + __ret_239; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p64_x3(__p0, __p1) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ +#define vshll_high_n_s16(__p0_240, __p1_240) __extension__ ({ \ + int16x8_t __s0_240 = __p0_240; \ + int32x4_t __ret_240; \ + __ret_240 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_240), __p1_240)); \ + __ret_240; \ }) #else -#define vst1_p64_x3(__p0, __p1) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ +#define vshll_high_n_s16(__p0_241, __p1_241) __extension__ ({ \ + int16x8_t __s0_241 = __p0_241; \ + int16x8_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_241; \ + __ret_241 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_241), __p1_241)); \ + __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 3, 2, 1, 0); \ + __ret_241; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p16_x3(__p0, __p1) __extension__ ({ \ - poly16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ +#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ + __ret; \ }) #else -#define vst1_p16_x3(__p0, __p1) __extension__ ({ \ - poly16x4x3_t __s1 = __p1; \ - poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ +#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ - poly8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ +#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ + __ret; \ }) #else -#define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ - poly8x16x3_t __s1 = __p1; \ - poly8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ +#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ - poly64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ +#define vshrn_high_n_u32(__p0_242, __p1_242, __p2_242) __extension__ ({ \ + uint16x4_t __s0_242 = __p0_242; \ + uint32x4_t __s1_242 = __p1_242; \ + uint16x8_t __ret_242; \ + __ret_242 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_242), (uint16x4_t)(vshrn_n_u32(__s1_242, __p2_242)))); \ + __ret_242; \ }) #else -#define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ - poly64x2x3_t __s1 = __p1; \ - poly64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ +#define vshrn_high_n_u32(__p0_243, __p1_243, __p2_243) __extension__ ({ \ + uint16x4_t __s0_243 = __p0_243; \ + uint32x4_t __s1_243 = __p1_243; \ + uint16x4_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 3, 2, 1, 0); \ + uint32x4_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 3, 2, 1, 0); \ + uint16x8_t __ret_243; \ + __ret_243 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_243), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_243, __p2_243)))); \ + __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_243; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ - poly16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ +#define vshrn_high_n_u64(__p0_244, __p1_244, __p2_244) __extension__ ({ \ + uint32x2_t __s0_244 = __p0_244; \ + uint64x2_t __s1_244 = __p1_244; \ + uint32x4_t __ret_244; \ + __ret_244 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_244), (uint32x2_t)(vshrn_n_u64(__s1_244, __p2_244)))); \ + __ret_244; \ }) #else -#define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ - poly16x8x3_t __s1 = __p1; \ - poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ +#define vshrn_high_n_u64(__p0_245, __p1_245, __p2_245) __extension__ ({ \ + uint32x2_t __s0_245 = __p0_245; \ + uint64x2_t __s1_245 = __p1_245; \ + uint32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ + uint64x2_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 1, 0); \ + uint32x4_t __ret_245; \ + __ret_245 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_245), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_245, __p2_245)))); \ + __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, 3, 2, 1, 0); \ + __ret_245; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ - uint8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ +#define vshrn_high_n_u16(__p0_246, __p1_246, __p2_246) __extension__ ({ \ + uint8x8_t __s0_246 = __p0_246; \ + uint16x8_t __s1_246 = __p1_246; \ + uint8x16_t __ret_246; \ + __ret_246 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_246), (uint8x8_t)(vshrn_n_u16(__s1_246, __p2_246)))); \ + __ret_246; \ }) -#else -#define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ - uint8x16x3_t __s1 = __p1; \ - uint8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ +#else +#define vshrn_high_n_u16(__p0_247, __p1_247, __p2_247) __extension__ ({ \ + uint8x8_t __s0_247 = __p0_247; \ + uint16x8_t __s1_247 = __p1_247; \ + uint8x8_t __rev0_247; __rev0_247 = __builtin_shufflevector(__s0_247, __s0_247, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_247; __rev1_247 = __builtin_shufflevector(__s1_247, __s1_247, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_247; \ + __ret_247 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_247), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_247, __p2_247)))); \ + __ret_247 = __builtin_shufflevector(__ret_247, __ret_247, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_247; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ - uint32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ +#define vshrn_high_n_s32(__p0_248, __p1_248, __p2_248) __extension__ ({ \ + int16x4_t __s0_248 = __p0_248; \ + int32x4_t __s1_248 = __p1_248; \ + int16x8_t __ret_248; \ + __ret_248 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_248), (int16x4_t)(vshrn_n_s32(__s1_248, __p2_248)))); \ + __ret_248; \ }) #else -#define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ - uint32x4x3_t __s1 = __p1; \ - uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ +#define vshrn_high_n_s32(__p0_249, __p1_249, __p2_249) __extension__ ({ \ + int16x4_t __s0_249 = __p0_249; \ + int32x4_t __s1_249 = __p1_249; \ + int16x4_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 3, 2, 1, 0); \ + int32x4_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, 3, 2, 1, 0); \ + int16x8_t __ret_249; \ + __ret_249 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_249), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_249, __p2_249)))); \ + __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_249; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ - uint64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ +#define vshrn_high_n_s64(__p0_250, __p1_250, __p2_250) __extension__ ({ \ + int32x2_t __s0_250 = __p0_250; \ + int64x2_t __s1_250 = __p1_250; \ + int32x4_t __ret_250; \ + __ret_250 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_250), (int32x2_t)(vshrn_n_s64(__s1_250, __p2_250)))); \ + __ret_250; \ }) #else -#define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ - uint64x2x3_t __s1 = __p1; \ - uint64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ +#define vshrn_high_n_s64(__p0_251, __p1_251, __p2_251) __extension__ ({ \ + int32x2_t __s0_251 = __p0_251; \ + int64x2_t __s1_251 = __p1_251; \ + int32x2_t __rev0_251; __rev0_251 = __builtin_shufflevector(__s0_251, __s0_251, 1, 0); \ + int64x2_t __rev1_251; __rev1_251 = __builtin_shufflevector(__s1_251, __s1_251, 1, 0); \ + int32x4_t __ret_251; \ + __ret_251 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_251), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_251, __p2_251)))); \ + __ret_251 = __builtin_shufflevector(__ret_251, __ret_251, 3, 2, 1, 0); \ + __ret_251; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ - uint16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ +#define vshrn_high_n_s16(__p0_252, __p1_252, __p2_252) __extension__ ({ \ + int8x8_t __s0_252 = __p0_252; \ + int16x8_t __s1_252 = __p1_252; \ + int8x16_t __ret_252; \ + __ret_252 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_252), (int8x8_t)(vshrn_n_s16(__s1_252, __p2_252)))); \ + __ret_252; \ }) #else -#define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ - uint16x8x3_t __s1 = __p1; \ - uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ +#define vshrn_high_n_s16(__p0_253, __p1_253, __p2_253) __extension__ ({ \ + int8x8_t __s0_253 = __p0_253; \ + int16x8_t __s1_253 = __p1_253; \ + int8x8_t __rev0_253; __rev0_253 = __builtin_shufflevector(__s0_253, __s0_253, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_253; __rev1_253 = __builtin_shufflevector(__s1_253, __s1_253, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_253; \ + __ret_253 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_253), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_253, __p2_253)))); \ + __ret_253 = __builtin_shufflevector(__ret_253, __ret_253, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_253; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ - int8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ +#define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ - int8x16x3_t __s1 = __p1; \ - int8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ +#define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ - float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 42); \ +#define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ - float64x2x3_t __s1 = __p1; \ - float64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 42); \ +#define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ - float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 41); \ +#define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret; \ }) #else -#define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ - float32x4x3_t __s1 = __p1; \ - float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 41); \ +#define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ +#define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ + __ret; \ }) #else -#define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ - float16x8x3_t __s1 = __p1; \ - float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ +#define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ - int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 34); \ -}) +__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); + return __ret; +} #else -#define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ - int32x4x3_t __s1 = __p1; \ - int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 34); \ -}) +__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ - int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 35); \ -}) +__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); + return __ret; +} #else -#define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ - int64x2x3_t __s1 = __p1; \ - int64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 35); \ -}) +__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ - int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 33); \ -}) +__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); + return __ret; +} #else -#define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ - int16x8x3_t __s1 = __p1; \ - int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 33); \ -}) +__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u8_x3(__p0, __p1) __extension__ ({ \ - uint8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ -}) +__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); + return __ret; +} #else -#define vst1_u8_x3(__p0, __p1) __extension__ ({ \ - uint8x8x3_t __s1 = __p1; \ - uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ -}) +__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u32_x3(__p0, __p1) __extension__ ({ \ - uint32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ -}) +__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + return __ret; +} #else -#define vst1_u32_x3(__p0, __p1) __extension__ ({ \ - uint32x2x3_t __s1 = __p1; \ - uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ -}) +__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __ret; + __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u64_x3(__p0, __p1) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ -}) +__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + return __ret; +} #else -#define vst1_u64_x3(__p0, __p1) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ -}) +__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __ret; + __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u16_x3(__p0, __p1) __extension__ ({ \ - uint16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ -}) +__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + return __ret; +} #else -#define vst1_u16_x3(__p0, __p1) __extension__ ({ \ - uint16x4x3_t __s1 = __p1; \ - uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ -}) +__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __ret; + __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s8_x3(__p0, __p1) __extension__ ({ \ - int8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ -}) +__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + return __ret; +} #else -#define vst1_s8_x3(__p0, __p1) __extension__ ({ \ - int8x8x3_t __s1 = __p1; \ - int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ -}) +__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret; + __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f64_x3(__p0, __p1) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ -}) +__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + return __ret; +} #else -#define vst1_f64_x3(__p0, __p1) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ -}) +__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __ret; + __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f32_x3(__p0, __p1) __extension__ ({ \ - float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 9); \ -}) +__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + return __ret; +} #else -#define vst1_f32_x3(__p0, __p1) __extension__ ({ \ - float32x2x3_t __s1 = __p1; \ - float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 9); \ -}) +__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __ret; + __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f16_x3(__p0, __p1) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ -}) +__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} #else -#define vst1_f16_x3(__p0, __p1) __extension__ ({ \ - float16x4x3_t __s1 = __p1; \ - float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ -}) +__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { + uint64x1_t __ret; + __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s32_x3(__p0, __p1) __extension__ ({ \ - int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 2); \ -}) +__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + return __ret; +} #else -#define vst1_s32_x3(__p0, __p1) __extension__ ({ \ - int32x2x3_t __s1 = __p1; \ - int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 2); \ -}) +__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __ret; + __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s64_x3(__p0, __p1) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ -}) +__ai float64x2_t vsqrtq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 42); + return __ret; +} #else -#define vst1_s64_x3(__p0, __p1) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ -}) +__ai float64x2_t vsqrtq_f64(float64x2_t __p0) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s16_x3(__p0, __p1) __extension__ ({ \ - int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 1); \ -}) +__ai float32x4_t vsqrtq_f32(float32x4_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 41); + return __ret; +} #else -#define vst1_s16_x3(__p0, __p1) __extension__ ({ \ - int16x4x3_t __s1 = __p1; \ - int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 1); \ -}) +__ai float32x4_t vsqrtq_f32(float32x4_t __p0) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p8_x4(__p0, __p1) __extension__ ({ \ - poly8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ -}) -#else -#define vst1_p8_x4(__p0, __p1) __extension__ ({ \ - poly8x8x4_t __s1 = __p1; \ - poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ -}) +__ai float64x1_t vsqrt_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); + return __ret; +} +#else +__ai float64x1_t vsqrt_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p64_x4(__p0, __p1) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ -}) +__ai float32x2_t vsqrt_f32(float32x2_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 9); + return __ret; +} #else -#define vst1_p64_x4(__p0, __p1) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ -}) +__ai float32x2_t vsqrt_f32(float32x2_t __p0) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_p16_x4(__p0, __p1) __extension__ ({ \ - poly16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ +#define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1_p16_x4(__p0, __p1) __extension__ ({ \ - poly16x4x4_t __s1 = __p1; \ - poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ +#define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ - poly8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ +#define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ - poly8x16x4_t __s1 = __p1; \ - poly8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ +#define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ - poly64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ +#define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ - poly64x2x4_t __s1 = __p1; \ - poly64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ +#define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __s1 = __p1; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ - poly16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ +#define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #else -#define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ - poly16x8x4_t __s1 = __p1; \ - poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ +#define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __s1 = __p1; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ - uint8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ +#define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret; \ }) #else -#define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ - uint8x16x4_t __s1 = __p1; \ - uint8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ +#define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s0 = __p0; \ + poly64x1_t __s1 = __p1; \ + poly64x1_t __ret; \ + __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ - uint32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ +#define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ + __ret; \ }) #else -#define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ - uint32x4x4_t __s1 = __p1; \ - uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ +#define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s0 = __p0; \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + poly64x2_t __ret; \ + __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ - uint64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ +#define vst1_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ }) #else -#define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ - uint64x2x4_t __s1 = __p1; \ - uint64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ +#define vst1_p64(__p0, __p1) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ - uint16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ +#define vst1q_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 38); \ }) #else -#define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ - uint16x8x4_t __s1 = __p1; \ - uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ +#define vst1q_p64(__p0, __p1) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ - int8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ +#define vst1q_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 42); \ }) #else -#define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ - int8x16x4_t __s1 = __p1; \ - int8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ +#define vst1q_f64(__p0, __p1) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ - float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 42); \ +#define vst1_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ }) #else -#define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ - float64x2x4_t __s1 = __p1; \ - float64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 42); \ +#define vst1_f64(__p0, __p1) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ - float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 41); \ +#define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ }) #else -#define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ - float32x4x4_t __s1 = __p1; \ - float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 41); \ +#define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ +#define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ }) #else -#define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ - float16x8x4_t __s1 = __p1; \ - float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ +#define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ + poly64x2_t __s1 = __p1; \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ - int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 34); \ +#define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ }) #else -#define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ - int32x4x4_t __s1 = __p1; \ - int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 34); \ +#define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ + __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ - int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 35); \ +#define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ }) #else -#define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ - int64x2x4_t __s1 = __p1; \ - int64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 35); \ +#define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ - int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 33); \ +#define vst1_p64_x2(__p0, __p1) __extension__ ({ \ + poly64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) #else -#define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ - int16x8x4_t __s1 = __p1; \ - int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 33); \ +#define vst1_p64_x2(__p0, __p1) __extension__ ({ \ + poly64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u8_x4(__p0, __p1) __extension__ ({ \ - uint8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ +#define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ + poly64x2x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ }) #else -#define vst1_u8_x4(__p0, __p1) __extension__ ({ \ - uint8x8x4_t __s1 = __p1; \ - uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ +#define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ + poly64x2x2_t __s1 = __p1; \ + poly64x2x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u32_x4(__p0, __p1) __extension__ ({ \ - uint32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ +#define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ + float64x2x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 42); \ }) #else -#define vst1_u32_x4(__p0, __p1) __extension__ ({ \ - uint32x2x4_t __s1 = __p1; \ - uint32x2x4_t __rev1; \ +#define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ + float64x2x2_t __s1 = __p1; \ + float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ + __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u64_x4(__p0, __p1) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ +#define vst1_f64_x2(__p0, __p1) __extension__ ({ \ + float64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ }) #else -#define vst1_u64_x4(__p0, __p1) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ +#define vst1_f64_x2(__p0, __p1) __extension__ ({ \ + float64x1x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_u16_x4(__p0, __p1) __extension__ ({ \ - uint16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ +#define vst1_p64_x3(__p0, __p1) __extension__ ({ \ + poly64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) #else -#define vst1_u16_x4(__p0, __p1) __extension__ ({ \ - uint16x4x4_t __s1 = __p1; \ - uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ +#define vst1_p64_x3(__p0, __p1) __extension__ ({ \ + poly64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s8_x4(__p0, __p1) __extension__ ({ \ - int8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ +#define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ + poly64x2x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ }) #else -#define vst1_s8_x4(__p0, __p1) __extension__ ({ \ - int8x8x4_t __s1 = __p1; \ - int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ +#define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ + poly64x2x3_t __s1 = __p1; \ + poly64x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f64_x4(__p0, __p1) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ +#define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ + float64x2x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 42); \ }) #else -#define vst1_f64_x4(__p0, __p1) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ +#define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ + float64x2x3_t __s1 = __p1; \ + float64x2x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f32_x4(__p0, __p1) __extension__ ({ \ - float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 9); \ +#define vst1_f64_x3(__p0, __p1) __extension__ ({ \ + float64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ }) #else -#define vst1_f32_x4(__p0, __p1) __extension__ ({ \ - float32x2x4_t __s1 = __p1; \ - float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 9); \ +#define vst1_f64_x3(__p0, __p1) __extension__ ({ \ + float64x1x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_f16_x4(__p0, __p1) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ +#define vst1_p64_x4(__p0, __p1) __extension__ ({ \ + poly64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) #else -#define vst1_f16_x4(__p0, __p1) __extension__ ({ \ - float16x4x4_t __s1 = __p1; \ - float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ +#define vst1_p64_x4(__p0, __p1) __extension__ ({ \ + poly64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s32_x4(__p0, __p1) __extension__ ({ \ - int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 2); \ +#define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ + poly64x2x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ }) #else -#define vst1_s32_x4(__p0, __p1) __extension__ ({ \ - int32x2x4_t __s1 = __p1; \ - int32x2x4_t __rev1; \ +#define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ + poly64x2x4_t __s1 = __p1; \ + poly64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 2); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s64_x4(__p0, __p1) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ +#define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ + float64x2x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 42); \ }) #else -#define vst1_s64_x4(__p0, __p1) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ +#define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ + float64x2x4_t __s1 = __p1; \ + float64x2x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ + __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vst1_s16_x4(__p0, __p1) __extension__ ({ \ - int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 1); \ +#define vst1_f64_x4(__p0, __p1) __extension__ ({ \ + float64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ }) #else -#define vst1_s16_x4(__p0, __p1) __extension__ ({ \ - int16x4x4_t __s1 = __p1; \ - int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 1); \ +#define vst1_f64_x4(__p0, __p1) __extension__ ({ \ + float64x1x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ }) #endif diff --git a/lib/clang/7.0.0/include/armintr.h b/lib/clang/8.0.0/include/armintr.h similarity index 100% rename from lib/clang/7.0.0/include/armintr.h rename to lib/clang/8.0.0/include/armintr.h diff --git a/lib/clang/7.0.0/include/avx2intrin.h b/lib/clang/8.0.0/include/avx2intrin.h similarity index 58% rename from lib/clang/7.0.0/include/avx2intrin.h rename to lib/clang/8.0.0/include/avx2intrin.h index caf4ced..9688a96 100644 --- a/lib/clang/7.0.0/include/avx2intrin.h +++ b/lib/clang/8.0.0/include/avx2intrin.h @@ -29,120 +29,121 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2"))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ #define _mm256_mpsadbw_epu8(X, Y, M) \ (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); } -#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ +#define _mm256_alignr_epi8(a, b, n) \ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (n)); }) + (__v32qi)(__m256i)(b), (n)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { typedef unsigned short __v32hu __attribute__((__vector_size__(64))); @@ -152,7 +153,7 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) >> 1, __v32qu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { typedef unsigned int __v16su __attribute__((__vector_size__(64))); @@ -162,58 +163,42 @@ _mm256_avg_epu16(__m256i __a, __m256i __b) >> 1, __v16hu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, (__v32qi)__M); } -#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \ - (__v16hi)(__m256i)(V2), \ - (((M) & 0x01) ? 16 : 0), \ - (((M) & 0x02) ? 17 : 1), \ - (((M) & 0x04) ? 18 : 2), \ - (((M) & 0x08) ? 19 : 3), \ - (((M) & 0x10) ? 20 : 4), \ - (((M) & 0x20) ? 21 : 5), \ - (((M) & 0x40) ? 22 : 6), \ - (((M) & 0x80) ? 23 : 7), \ - (((M) & 0x01) ? 24 : 8), \ - (((M) & 0x02) ? 25 : 9), \ - (((M) & 0x04) ? 26 : 10), \ - (((M) & 0x08) ? 27 : 11), \ - (((M) & 0x10) ? 28 : 12), \ - (((M) & 0x20) ? 29 : 13), \ - (((M) & 0x40) ? 30 : 14), \ - (((M) & 0x80) ? 31 : 15)); }) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm256_blend_epi16(V1, V2, M) \ + (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \ + (__v16hi)(__m256i)(V2), (int)(M)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char @@ -221,151 +206,151 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b) return (__m256i)((__v32qs)__a > (__v32qs)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -373,7 +358,7 @@ _mm256_cvtepi8_epi16(__m128i __V) return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -381,7 +366,7 @@ _mm256_cvtepi8_epi32(__m128i __V) return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -389,920 +374,795 @@ _mm256_cvtepi8_epi64(__m128i __V) return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } -#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \ - (__v8si)_mm256_undefined_si256(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)); }) - -#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_undefined_si256(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3), \ - 8, 9, 10, 11, \ - 12 + (((imm) >> 0) & 0x3), \ - 12 + (((imm) >> 2) & 0x3), \ - 12 + (((imm) >> 4) & 0x3), \ - 12 + (((imm) >> 6) & 0x3)); }) - -#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_undefined_si256(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4, 5, 6, 7, \ - 8 + (((imm) >> 0) & 0x3), \ - 8 + (((imm) >> 2) & 0x3), \ - 8 + (((imm) >> 4) & 0x3), \ - 8 + (((imm) >> 6) & 0x3), \ - 12, 13, 14, 15); }) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm256_shuffle_epi32(a, imm) \ + (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm)) + +#define _mm256_shufflehi_epi16(a, imm) \ + (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm)) + +#define _mm256_shufflelo_epi16(a, imm) \ + (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } -#define _mm256_slli_si256(a, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector( \ - (__v32qi)_mm256_setzero_si256(), \ - (__v32qi)(__m256i)(a), \ - ((char)(imm)&0xF0) ? 0 : ((char)(imm)>0x0 ? 16 : 32) - (char)(imm), \ - ((char)(imm)&0xF0) ? 1 : ((char)(imm)>0x1 ? 17 : 33) - (char)(imm), \ - ((char)(imm)&0xF0) ? 2 : ((char)(imm)>0x2 ? 18 : 34) - (char)(imm), \ - ((char)(imm)&0xF0) ? 3 : ((char)(imm)>0x3 ? 19 : 35) - (char)(imm), \ - ((char)(imm)&0xF0) ? 4 : ((char)(imm)>0x4 ? 20 : 36) - (char)(imm), \ - ((char)(imm)&0xF0) ? 5 : ((char)(imm)>0x5 ? 21 : 37) - (char)(imm), \ - ((char)(imm)&0xF0) ? 6 : ((char)(imm)>0x6 ? 22 : 38) - (char)(imm), \ - ((char)(imm)&0xF0) ? 7 : ((char)(imm)>0x7 ? 23 : 39) - (char)(imm), \ - ((char)(imm)&0xF0) ? 8 : ((char)(imm)>0x8 ? 24 : 40) - (char)(imm), \ - ((char)(imm)&0xF0) ? 9 : ((char)(imm)>0x9 ? 25 : 41) - (char)(imm), \ - ((char)(imm)&0xF0) ? 10 : ((char)(imm)>0xA ? 26 : 42) - (char)(imm), \ - ((char)(imm)&0xF0) ? 11 : ((char)(imm)>0xB ? 27 : 43) - (char)(imm), \ - ((char)(imm)&0xF0) ? 12 : ((char)(imm)>0xC ? 28 : 44) - (char)(imm), \ - ((char)(imm)&0xF0) ? 13 : ((char)(imm)>0xD ? 29 : 45) - (char)(imm), \ - ((char)(imm)&0xF0) ? 14 : ((char)(imm)>0xE ? 30 : 46) - (char)(imm), \ - ((char)(imm)&0xF0) ? 15 : ((char)(imm)>0xF ? 31 : 47) - (char)(imm), \ - ((char)(imm)&0xF0) ? 16 : ((char)(imm)>0x0 ? 32 : 48) - (char)(imm), \ - ((char)(imm)&0xF0) ? 17 : ((char)(imm)>0x1 ? 33 : 49) - (char)(imm), \ - ((char)(imm)&0xF0) ? 18 : ((char)(imm)>0x2 ? 34 : 50) - (char)(imm), \ - ((char)(imm)&0xF0) ? 19 : ((char)(imm)>0x3 ? 35 : 51) - (char)(imm), \ - ((char)(imm)&0xF0) ? 20 : ((char)(imm)>0x4 ? 36 : 52) - (char)(imm), \ - ((char)(imm)&0xF0) ? 21 : ((char)(imm)>0x5 ? 37 : 53) - (char)(imm), \ - ((char)(imm)&0xF0) ? 22 : ((char)(imm)>0x6 ? 38 : 54) - (char)(imm), \ - ((char)(imm)&0xF0) ? 23 : ((char)(imm)>0x7 ? 39 : 55) - (char)(imm), \ - ((char)(imm)&0xF0) ? 24 : ((char)(imm)>0x8 ? 40 : 56) - (char)(imm), \ - ((char)(imm)&0xF0) ? 25 : ((char)(imm)>0x9 ? 41 : 57) - (char)(imm), \ - ((char)(imm)&0xF0) ? 26 : ((char)(imm)>0xA ? 42 : 58) - (char)(imm), \ - ((char)(imm)&0xF0) ? 27 : ((char)(imm)>0xB ? 43 : 59) - (char)(imm), \ - ((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 : 60) - (char)(imm), \ - ((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 : 61) - (char)(imm), \ - ((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 : 62) - (char)(imm), \ - ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 63) - (char)(imm)); }) - -#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm256_slli_si256(a, imm) \ + (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)) + +#define _mm256_bslli_epi128(a, imm) \ + (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); } -#define _mm256_srli_si256(a, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector( \ - (__v32qi)(__m256i)(a), \ - (__v32qi)_mm256_setzero_si256(), \ - ((char)(imm)&0xF0) ? 32 : (char)(imm) + ((char)(imm)>0xF ? 16 : 0), \ - ((char)(imm)&0xF0) ? 33 : (char)(imm) + ((char)(imm)>0xE ? 17 : 1), \ - ((char)(imm)&0xF0) ? 34 : (char)(imm) + ((char)(imm)>0xD ? 18 : 2), \ - ((char)(imm)&0xF0) ? 35 : (char)(imm) + ((char)(imm)>0xC ? 19 : 3), \ - ((char)(imm)&0xF0) ? 36 : (char)(imm) + ((char)(imm)>0xB ? 20 : 4), \ - ((char)(imm)&0xF0) ? 37 : (char)(imm) + ((char)(imm)>0xA ? 21 : 5), \ - ((char)(imm)&0xF0) ? 38 : (char)(imm) + ((char)(imm)>0x9 ? 22 : 6), \ - ((char)(imm)&0xF0) ? 39 : (char)(imm) + ((char)(imm)>0x8 ? 23 : 7), \ - ((char)(imm)&0xF0) ? 40 : (char)(imm) + ((char)(imm)>0x7 ? 24 : 8), \ - ((char)(imm)&0xF0) ? 41 : (char)(imm) + ((char)(imm)>0x6 ? 25 : 9), \ - ((char)(imm)&0xF0) ? 42 : (char)(imm) + ((char)(imm)>0x5 ? 26 : 10), \ - ((char)(imm)&0xF0) ? 43 : (char)(imm) + ((char)(imm)>0x4 ? 27 : 11), \ - ((char)(imm)&0xF0) ? 44 : (char)(imm) + ((char)(imm)>0x3 ? 28 : 12), \ - ((char)(imm)&0xF0) ? 45 : (char)(imm) + ((char)(imm)>0x2 ? 29 : 13), \ - ((char)(imm)&0xF0) ? 46 : (char)(imm) + ((char)(imm)>0x1 ? 30 : 14), \ - ((char)(imm)&0xF0) ? 47 : (char)(imm) + ((char)(imm)>0x0 ? 31 : 15), \ - ((char)(imm)&0xF0) ? 48 : (char)(imm) + ((char)(imm)>0xF ? 32 : 16), \ - ((char)(imm)&0xF0) ? 49 : (char)(imm) + ((char)(imm)>0xE ? 33 : 17), \ - ((char)(imm)&0xF0) ? 50 : (char)(imm) + ((char)(imm)>0xD ? 34 : 18), \ - ((char)(imm)&0xF0) ? 51 : (char)(imm) + ((char)(imm)>0xC ? 35 : 19), \ - ((char)(imm)&0xF0) ? 52 : (char)(imm) + ((char)(imm)>0xB ? 36 : 20), \ - ((char)(imm)&0xF0) ? 53 : (char)(imm) + ((char)(imm)>0xA ? 37 : 21), \ - ((char)(imm)&0xF0) ? 54 : (char)(imm) + ((char)(imm)>0x9 ? 38 : 22), \ - ((char)(imm)&0xF0) ? 55 : (char)(imm) + ((char)(imm)>0x8 ? 39 : 23), \ - ((char)(imm)&0xF0) ? 56 : (char)(imm) + ((char)(imm)>0x7 ? 40 : 24), \ - ((char)(imm)&0xF0) ? 57 : (char)(imm) + ((char)(imm)>0x6 ? 41 : 25), \ - ((char)(imm)&0xF0) ? 58 : (char)(imm) + ((char)(imm)>0x5 ? 42 : 26), \ - ((char)(imm)&0xF0) ? 59 : (char)(imm) + ((char)(imm)>0x4 ? 43 : 27), \ - ((char)(imm)&0xF0) ? 60 : (char)(imm) + ((char)(imm)>0x3 ? 44 : 28), \ - ((char)(imm)&0xF0) ? 61 : (char)(imm) + ((char)(imm)>0x2 ? 45 : 29), \ - ((char)(imm)&0xF0) ? 62 : (char)(imm) + ((char)(imm)>0x1 ? 46 : 30), \ - ((char)(imm)&0xF0) ? 63 : (char)(imm) + ((char)(imm)>0x0 ? 47 : 31)); }) - -#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm256_srli_si256(a, imm) \ + (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)) + +#define _mm256_bsrli_epi128(a, imm) \ + (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_stream_load_si256(__m256i const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } -#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \ - (__v4si)(__m128i)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) - -#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \ - (__v8si)(__m256i)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm_blend_epi32(V1, V2, M) \ + (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \ + (__v4si)(__m128i)(V2), (int)(M)) + +#define _mm256_blend_epi32(V1, V2, M) \ + (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \ + (__v8si)(__m256i)(V2), (int)(M)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } -#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \ - (__v4df)_mm256_undefined_pd(), \ - ((M) >> 0) & 0x3, \ - ((M) >> 2) & 0x3, \ - ((M) >> 4) & 0x3, \ - ((M) >> 6) & 0x3); }) +#define _mm256_permute4x64_pd(V, M) \ + (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } -#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \ - (__v4di)_mm256_undefined_si256(), \ - ((M) >> 0) & 0x3, \ - ((M) >> 2) & 0x3, \ - ((M) >> 4) & 0x3, \ - ((M) >> 6) & 0x3); }) - -#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); }) - -#define _mm256_extracti128_si256(V, M) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \ - (__v4di)_mm256_undefined_si256(), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) ); }) - -#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \ - (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) ); }) - -static __inline__ __m256i __DEFAULT_FN_ATTRS +#define _mm256_permute4x64_epi64(V, M) \ + (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)) + +#define _mm256_permute2x128_si256(V1, V2, M) \ + (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M)) + +#define _mm256_extracti128_si256(V, M) \ + (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M)) + +#define _mm256_inserti128_si256(V1, V2, M) \ + (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \ + (__v2di)(__m128i)(V2), (int)(M)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); } -#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_pd(a, m, i, mask, s) \ (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2df)(__m128d)(mask), (s)); }) + (__v2df)(__m128d)(mask), (s)) -#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4df)(__m256d)(mask), (s)); }) + (__v4df)(__m256d)(mask), (s)) -#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_pd(a, m, i, mask, s) \ (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2df)(__m128d)(mask), (s)); }) + (__v2df)(__m128d)(mask), (s)) -#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4df)(__m256d)(mask), (s)); }) + (__v4df)(__m256d)(mask), (s)) -#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ - (__v8sf)(__m256)(mask), (s)); }) + (__v8sf)(__m256)(mask), (s)) -#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \ (int const *)(m), \ (__v8si)(__m256i)(i), \ - (__v8si)(__m256i)(mask), (s)); }) + (__v8si)(__m256i)(mask), (s)) -#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v2di)(__m128i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2di)(__m128i)(mask), (s)); }) + (__v2di)(__m128i)(mask), (s)) -#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4di)(__m256i)(mask), (s)); }) + (__v4di)(__m256i)(mask), (s)) -#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2di)(__m128i)(mask), (s)); }) + (__v2di)(__m128i)(mask), (s)) -#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4di)(__m256i)(mask), (s)); }) + (__v4di)(__m256i)(mask), (s)) -#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ +#define _mm_i32gather_pd(m, i, s) \ (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ - (s)); }) + (s)) -#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_pd(m, i, s) \ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ +#define _mm_i64gather_pd(m, i, s) \ (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ - (s)); }) + (s)) -#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_pd(m, i, s) \ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ +#define _mm_i32gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_ps(m, i, s) \ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \ _mm256_setzero_ps(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ +#define _mm_i64gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ +#define _mm_i32gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4si)(__m128i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_epi32(m, i, s) \ (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \ (int const *)(m), (__v8si)(__m256i)(i), \ - (__v8si)_mm256_set1_epi32(-1), (s)); }) + (__v8si)_mm256_set1_epi32(-1), (s)) -#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ +#define _mm_i64gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v2di)(__m128i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4di)(__m256i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ +#define _mm_i32gather_epi64(m, i, s) \ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2di)_mm_set1_epi64x(-1), (s)); }) + (__v2di)_mm_set1_epi64x(-1), (s)) -#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_epi64(m, i, s) \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4di)_mm256_set1_epi64x(-1), (s)); }) + (__v4di)_mm256_set1_epi64x(-1), (s)) -#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ +#define _mm_i64gather_epi64(m, i, s) \ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2di)_mm_set1_epi64x(-1), (s)); }) + (__v2di)_mm_set1_epi64x(-1), (s)) -#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_epi64(m, i, s) \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4di)_mm256_set1_epi64x(-1), (s)); }) + (__v4di)_mm256_set1_epi64x(-1), (s)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVX2INTRIN_H */ diff --git a/lib/clang/7.0.0/include/avx512bitalgintrin.h b/lib/clang/8.0.0/include/avx512bitalgintrin.h similarity index 93% rename from lib/clang/7.0.0/include/avx512bitalgintrin.h rename to lib/clang/8.0.0/include/avx512bitalgintrin.h index 2dd1471..56046f8 100644 --- a/lib/clang/7.0.0/include/avx512bitalgintrin.h +++ b/lib/clang/8.0.0/include/avx512bitalgintrin.h @@ -29,7 +29,7 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) @@ -48,7 +48,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { - return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), __U, __B); } @@ -70,7 +70,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { - return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), __U, __B); } diff --git a/lib/clang/7.0.0/include/avx512bwintrin.h b/lib/clang/8.0.0/include/avx512bwintrin.h similarity index 53% rename from lib/clang/7.0.0/include/avx512bwintrin.h rename to lib/clang/8.0.0/include/avx512bwintrin.h index f5ff5d3..bff5b97 100644 --- a/lib/clang/7.0.0/include/avx512bwintrin.h +++ b/lib/clang/8.0.0/include/avx512bwintrin.h @@ -32,69 +32,258 @@ typedef unsigned int __mmask32; typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_qi(void) { - return (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; +static __inline __mmask32 __DEFAULT_FN_ATTRS +_knot_mask32(__mmask32 __M) +{ + return __builtin_ia32_knotsi(__M); +} + +static __inline __mmask64 __DEFAULT_FN_ATTRS +_knot_mask64(__mmask64 __M) +{ + return __builtin_ia32_knotdi(__M); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kand_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kand_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kandn_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kandn_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kxnor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kxnor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kxor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kxor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); + return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) +{ + return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) +{ + return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); + return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); + return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) +{ + return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) +{ + return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_hi(void) { - return (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); + return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kadd_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kadd_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); +} + +#define _kshiftli_mask32(A, I) \ + (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) + +#define _kshiftri_mask32(A, I) \ + (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) + +#define _kshiftli_mask64(A, I) \ + (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) + +#define _kshiftri_mask64(A, I) \ + (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_cvtmask32_u32(__mmask32 __A) { + return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_cvtmask64_u64(__mmask64 __A) { + return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_cvtu32_mask32(unsigned int __A) { + return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_cvtu64_mask64(unsigned long long __A) { + return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_load_mask32(__mmask32 *__A) { + return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_load_mask64(__mmask64 *__A) { + return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_store_mask32(__mmask32 *__A, __mmask32 __B) { + *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_store_mask64(__mmask64 *__A, __mmask64 __B) { + *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); } /* Integer compare */ -#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi8_mask(a, b, p) \ (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1); }) + (__mmask64)-1) -#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)); }) + (__mmask64)(m)) -#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu8_mask(a, b, p) \ (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1); }) + (__mmask64)-1) -#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)); }) + (__mmask64)(m)) -#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi16_mask(a, b, p) \ (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu16_mask(a, b, p) \ (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) #define _mm512_cmpeq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -196,102 +385,102 @@ _mm512_setzero_hi(void) { #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, @@ -299,7 +488,7 @@ _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, @@ -307,69 +496,65 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) (__v32hi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_abs_epi8(__A), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_abs_epi8(__A), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_abs_epi16(__A), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_abs_epi16(__A), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packs_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packs_epi32(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, @@ -377,13 +562,13 @@ _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -391,29 +576,29 @@ _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packs_epi16(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packus_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packus_epi32(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, @@ -421,13 +606,13 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packus_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -435,127 +620,103 @@ _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packus_epi16(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_adds_epi8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_adds_epi8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_adds_epi16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_adds_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_adds_epu8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_adds_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_adds_epu16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_adds_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu8 (__m512i __A, __m512i __B) { typedef unsigned short __v64hu __attribute__((__vector_size__(128))); @@ -565,7 +726,7 @@ _mm512_avg_epu8 (__m512i __A, __m512i __B) >> 1, __v64qu); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { @@ -574,15 +735,15 @@ _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu16 (__m512i __A, __m512i __B) { typedef unsigned int __v32su __attribute__((__vector_size__(128))); @@ -592,7 +753,7 @@ _mm512_avg_epu16 (__m512i __A, __m512i __B) >> 1, __v32hu); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { @@ -601,245 +762,198 @@ _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi) _mm512_setzero_hi()); + (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_max_epi8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_max_epi8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_max_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_max_epi16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_max_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_max_epu8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_max_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_max_epu16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_min_epi8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_min_epi8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_min_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_min_epi16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_min_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_min_epu8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_min_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_min_epu16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_shuffle_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -847,173 +961,143 @@ _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_subs_epi8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_subs_epi8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_subs_epi16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_subs_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_subs_epu8(__A, __B), + (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_subs_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_subs_epu16(__A, __B), + (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_subs_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I, - __mmask32 __U, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) { - return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, - (__v32hi) __I /* idx */ , - (__v32hi) __B, - (__mmask32) __U); + return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, + (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, - (__v32hi) __A, - (__v32hi) __B, - (__mmask32) -1); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), + (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U, - __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, - (__v32hi) __A, - (__v32hi) __B, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), + (__v32hi)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A, - __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I - /* idx */ , - (__v32hi) __A, - (__v32hi) __B, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1021,21 +1105,21 @@ _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { @@ -1044,21 +1128,21 @@ _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhi_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1066,20 +1150,20 @@ _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, @@ -1087,114 +1171,114 @@ _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, (__v32hi)_mm512_maddubs_epi16(__X, __Y), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_madd_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi)_mm256_setzero_si256(), (__mmask32) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi)__O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), (__mmask32) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, - (__v32qi) _mm256_setzero_si256(), + (__v32qi) _mm256_undefined_si256(), (__mmask32) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, (__v32qi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 8, 64+8, 9, 64+9, @@ -1215,21 +1299,21 @@ _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 62, 64+62, 63, 64+63); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 4, 32+4, 5, 32+5, @@ -1242,21 +1326,21 @@ _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 30, 32+30, 31, 32+31); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 0, 64+0, 1, 64+1, @@ -1277,21 +1361,21 @@ _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 54, 64+54, 55, 64+55); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 0, 32+0, 1, 32+1, @@ -1304,21 +1388,21 @@ _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 26, 32+26, 27, 32+27); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi16(__m256i __A) { /* This function always performs a signed extension, but __v32qi is a char @@ -1326,7 +1410,7 @@ _mm512_cvtepi8_epi16(__m256i __A) return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1334,21 +1418,21 @@ _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi16(__m256i __A) { return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1356,96 +1440,54 @@ _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), - (__v32hi)_mm512_setzero_hi()); -} - - -#define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_undefined_epi32(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3), \ - 8, 9, 10, 11, \ - 12 + (((imm) >> 0) & 0x3), \ - 12 + (((imm) >> 2) & 0x3), \ - 12 + (((imm) >> 4) & 0x3), \ - 12 + (((imm) >> 6) & 0x3), \ - 16, 17, 18, 19, \ - 20 + (((imm) >> 0) & 0x3), \ - 20 + (((imm) >> 2) & 0x3), \ - 20 + (((imm) >> 4) & 0x3), \ - 20 + (((imm) >> 6) & 0x3), \ - 24, 25, 26, 27, \ - 28 + (((imm) >> 0) & 0x3), \ - 28 + (((imm) >> 2) & 0x3), \ - 28 + (((imm) >> 4) & 0x3), \ - 28 + (((imm) >> 6) & 0x3)); }) - -#define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ + (__v32hi)_mm512_setzero_si512()); +} + + +#define _mm512_shufflehi_epi16(A, imm) \ + (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) + +#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ - (__v32hi)(__m512i)(W)); }) + (__v32hi)(__m512i)(W)) -#define _mm512_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_hi()); }) - -#define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_undefined_epi32(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4, 5, 6, 7, \ - 8 + (((imm) >> 0) & 0x3), \ - 8 + (((imm) >> 2) & 0x3), \ - 8 + (((imm) >> 4) & 0x3), \ - 8 + (((imm) >> 6) & 0x3), \ - 12, 13, 14, 15, \ - 16 + (((imm) >> 0) & 0x3), \ - 16 + (((imm) >> 2) & 0x3), \ - 16 + (((imm) >> 4) & 0x3), \ - 16 + (((imm) >> 6) & 0x3), \ - 20, 21, 22, 23, \ - 24 + (((imm) >> 0) & 0x3), \ - 24 + (((imm) >> 2) & 0x3), \ - 24 + (((imm) >> 4) & 0x3), \ - 24 + (((imm) >> 6) & 0x3), \ - 28, 29, 30, 31); }) - - -#define _mm512_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ + (__v32hi)_mm512_setzero_si512()) + +#define _mm512_shufflelo_epi16(A, imm) \ + (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) + + +#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ - (__v32hi)(__m512i)(W)); }) + (__v32hi)(__m512i)(W)) -#define _mm512_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_hi()); }) + (__v32hi)_mm512_setzero_si512()) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1453,21 +1495,21 @@ _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sllv_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1475,21 +1517,21 @@ _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi16(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1497,90 +1539,24 @@ _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); -} - -#define _mm512_bslli_epi128(a, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector( \ - (__v64qi)_mm512_setzero_si512(), \ - (__v64qi)(__m512i)(a), \ - ((char)(imm)&0xF0) ? 0 : ((char)(imm)>0x0 ? 16 : 64) - (char)(imm), \ - ((char)(imm)&0xF0) ? 1 : ((char)(imm)>0x1 ? 17 : 65) - (char)(imm), \ - ((char)(imm)&0xF0) ? 2 : ((char)(imm)>0x2 ? 18 : 66) - (char)(imm), \ - ((char)(imm)&0xF0) ? 3 : ((char)(imm)>0x3 ? 19 : 67) - (char)(imm), \ - ((char)(imm)&0xF0) ? 4 : ((char)(imm)>0x4 ? 20 : 68) - (char)(imm), \ - ((char)(imm)&0xF0) ? 5 : ((char)(imm)>0x5 ? 21 : 69) - (char)(imm), \ - ((char)(imm)&0xF0) ? 6 : ((char)(imm)>0x6 ? 22 : 70) - (char)(imm), \ - ((char)(imm)&0xF0) ? 7 : ((char)(imm)>0x7 ? 23 : 71) - (char)(imm), \ - ((char)(imm)&0xF0) ? 8 : ((char)(imm)>0x8 ? 24 : 72) - (char)(imm), \ - ((char)(imm)&0xF0) ? 9 : ((char)(imm)>0x9 ? 25 : 73) - (char)(imm), \ - ((char)(imm)&0xF0) ? 10 : ((char)(imm)>0xA ? 26 : 74) - (char)(imm), \ - ((char)(imm)&0xF0) ? 11 : ((char)(imm)>0xB ? 27 : 75) - (char)(imm), \ - ((char)(imm)&0xF0) ? 12 : ((char)(imm)>0xC ? 28 : 76) - (char)(imm), \ - ((char)(imm)&0xF0) ? 13 : ((char)(imm)>0xD ? 29 : 77) - (char)(imm), \ - ((char)(imm)&0xF0) ? 14 : ((char)(imm)>0xE ? 30 : 78) - (char)(imm), \ - ((char)(imm)&0xF0) ? 15 : ((char)(imm)>0xF ? 31 : 79) - (char)(imm), \ - ((char)(imm)&0xF0) ? 16 : ((char)(imm)>0x0 ? 32 : 80) - (char)(imm), \ - ((char)(imm)&0xF0) ? 17 : ((char)(imm)>0x1 ? 33 : 81) - (char)(imm), \ - ((char)(imm)&0xF0) ? 18 : ((char)(imm)>0x2 ? 34 : 82) - (char)(imm), \ - ((char)(imm)&0xF0) ? 19 : ((char)(imm)>0x3 ? 35 : 83) - (char)(imm), \ - ((char)(imm)&0xF0) ? 20 : ((char)(imm)>0x4 ? 36 : 84) - (char)(imm), \ - ((char)(imm)&0xF0) ? 21 : ((char)(imm)>0x5 ? 37 : 85) - (char)(imm), \ - ((char)(imm)&0xF0) ? 22 : ((char)(imm)>0x6 ? 38 : 86) - (char)(imm), \ - ((char)(imm)&0xF0) ? 23 : ((char)(imm)>0x7 ? 39 : 87) - (char)(imm), \ - ((char)(imm)&0xF0) ? 24 : ((char)(imm)>0x8 ? 40 : 88) - (char)(imm), \ - ((char)(imm)&0xF0) ? 25 : ((char)(imm)>0x9 ? 41 : 89) - (char)(imm), \ - ((char)(imm)&0xF0) ? 26 : ((char)(imm)>0xA ? 42 : 90) - (char)(imm), \ - ((char)(imm)&0xF0) ? 27 : ((char)(imm)>0xB ? 43 : 91) - (char)(imm), \ - ((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 : 92) - (char)(imm), \ - ((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 : 93) - (char)(imm), \ - ((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 : 94) - (char)(imm), \ - ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 95) - (char)(imm), \ - ((char)(imm)&0xF0) ? 32 : ((char)(imm)>0x0 ? 48 : 96) - (char)(imm), \ - ((char)(imm)&0xF0) ? 33 : ((char)(imm)>0x1 ? 49 : 97) - (char)(imm), \ - ((char)(imm)&0xF0) ? 34 : ((char)(imm)>0x2 ? 50 : 98) - (char)(imm), \ - ((char)(imm)&0xF0) ? 35 : ((char)(imm)>0x3 ? 51 : 99) - (char)(imm), \ - ((char)(imm)&0xF0) ? 36 : ((char)(imm)>0x4 ? 52 : 100) - (char)(imm), \ - ((char)(imm)&0xF0) ? 37 : ((char)(imm)>0x5 ? 53 : 101) - (char)(imm), \ - ((char)(imm)&0xF0) ? 38 : ((char)(imm)>0x6 ? 54 : 102) - (char)(imm), \ - ((char)(imm)&0xF0) ? 39 : ((char)(imm)>0x7 ? 55 : 103) - (char)(imm), \ - ((char)(imm)&0xF0) ? 40 : ((char)(imm)>0x8 ? 56 : 104) - (char)(imm), \ - ((char)(imm)&0xF0) ? 41 : ((char)(imm)>0x9 ? 57 : 105) - (char)(imm), \ - ((char)(imm)&0xF0) ? 42 : ((char)(imm)>0xA ? 58 : 106) - (char)(imm), \ - ((char)(imm)&0xF0) ? 43 : ((char)(imm)>0xB ? 59 : 107) - (char)(imm), \ - ((char)(imm)&0xF0) ? 44 : ((char)(imm)>0xC ? 60 : 108) - (char)(imm), \ - ((char)(imm)&0xF0) ? 45 : ((char)(imm)>0xD ? 61 : 109) - (char)(imm), \ - ((char)(imm)&0xF0) ? 46 : ((char)(imm)>0xE ? 62 : 110) - (char)(imm), \ - ((char)(imm)&0xF0) ? 47 : ((char)(imm)>0xF ? 63 : 111) - (char)(imm), \ - ((char)(imm)&0xF0) ? 48 : ((char)(imm)>0x0 ? 64 : 112) - (char)(imm), \ - ((char)(imm)&0xF0) ? 49 : ((char)(imm)>0x1 ? 65 : 113) - (char)(imm), \ - ((char)(imm)&0xF0) ? 50 : ((char)(imm)>0x2 ? 66 : 114) - (char)(imm), \ - ((char)(imm)&0xF0) ? 51 : ((char)(imm)>0x3 ? 67 : 115) - (char)(imm), \ - ((char)(imm)&0xF0) ? 52 : ((char)(imm)>0x4 ? 68 : 116) - (char)(imm), \ - ((char)(imm)&0xF0) ? 53 : ((char)(imm)>0x5 ? 69 : 117) - (char)(imm), \ - ((char)(imm)&0xF0) ? 54 : ((char)(imm)>0x6 ? 70 : 118) - (char)(imm), \ - ((char)(imm)&0xF0) ? 55 : ((char)(imm)>0x7 ? 71 : 119) - (char)(imm), \ - ((char)(imm)&0xF0) ? 56 : ((char)(imm)>0x8 ? 72 : 120) - (char)(imm), \ - ((char)(imm)&0xF0) ? 57 : ((char)(imm)>0x9 ? 73 : 121) - (char)(imm), \ - ((char)(imm)&0xF0) ? 58 : ((char)(imm)>0xA ? 74 : 122) - (char)(imm), \ - ((char)(imm)&0xF0) ? 59 : ((char)(imm)>0xB ? 75 : 123) - (char)(imm), \ - ((char)(imm)&0xF0) ? 60 : ((char)(imm)>0xC ? 76 : 124) - (char)(imm), \ - ((char)(imm)&0xF0) ? 61 : ((char)(imm)>0xD ? 77 : 125) - (char)(imm), \ - ((char)(imm)&0xF0) ? 62 : ((char)(imm)>0xE ? 78 : 126) - (char)(imm), \ - ((char)(imm)&0xF0) ? 63 : ((char)(imm)>0xF ? 79 : 127) - (char)(imm)); }) - -static __inline__ __m512i __DEFAULT_FN_ATTRS + (__v32hi)_mm512_setzero_si512()); +} + +#define _mm512_bslli_epi128(a, imm) \ + (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1588,21 +1564,21 @@ _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srlv_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1610,21 +1586,21 @@ _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srav_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1632,21 +1608,21 @@ _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi16(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1654,21 +1630,21 @@ _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1676,21 +1652,21 @@ _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi16(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1698,84 +1674,18 @@ _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); -} - -#define _mm512_bsrli_epi128(a, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector( \ - (__v64qi)(__m512i)(a), \ - (__v64qi)_mm512_setzero_si512(), \ - ((char)(imm)&0xF0) ? 64 : (char)(imm) + ((char)(imm)>0xF ? 48 : 0), \ - ((char)(imm)&0xF0) ? 65 : (char)(imm) + ((char)(imm)>0xE ? 49 : 1), \ - ((char)(imm)&0xF0) ? 66 : (char)(imm) + ((char)(imm)>0xD ? 50 : 2), \ - ((char)(imm)&0xF0) ? 67 : (char)(imm) + ((char)(imm)>0xC ? 51 : 3), \ - ((char)(imm)&0xF0) ? 68 : (char)(imm) + ((char)(imm)>0xB ? 52 : 4), \ - ((char)(imm)&0xF0) ? 69 : (char)(imm) + ((char)(imm)>0xA ? 53 : 5), \ - ((char)(imm)&0xF0) ? 70 : (char)(imm) + ((char)(imm)>0x9 ? 54 : 6), \ - ((char)(imm)&0xF0) ? 71 : (char)(imm) + ((char)(imm)>0x8 ? 55 : 7), \ - ((char)(imm)&0xF0) ? 72 : (char)(imm) + ((char)(imm)>0x7 ? 56 : 8), \ - ((char)(imm)&0xF0) ? 73 : (char)(imm) + ((char)(imm)>0x6 ? 57 : 9), \ - ((char)(imm)&0xF0) ? 74 : (char)(imm) + ((char)(imm)>0x5 ? 58 : 10), \ - ((char)(imm)&0xF0) ? 75 : (char)(imm) + ((char)(imm)>0x4 ? 59 : 11), \ - ((char)(imm)&0xF0) ? 76 : (char)(imm) + ((char)(imm)>0x3 ? 60 : 12), \ - ((char)(imm)&0xF0) ? 77 : (char)(imm) + ((char)(imm)>0x2 ? 61 : 13), \ - ((char)(imm)&0xF0) ? 78 : (char)(imm) + ((char)(imm)>0x1 ? 62 : 14), \ - ((char)(imm)&0xF0) ? 79 : (char)(imm) + ((char)(imm)>0x0 ? 63 : 15), \ - ((char)(imm)&0xF0) ? 80 : (char)(imm) + ((char)(imm)>0xF ? 64 : 16), \ - ((char)(imm)&0xF0) ? 81 : (char)(imm) + ((char)(imm)>0xE ? 65 : 17), \ - ((char)(imm)&0xF0) ? 82 : (char)(imm) + ((char)(imm)>0xD ? 66 : 18), \ - ((char)(imm)&0xF0) ? 83 : (char)(imm) + ((char)(imm)>0xC ? 67 : 19), \ - ((char)(imm)&0xF0) ? 84 : (char)(imm) + ((char)(imm)>0xB ? 68 : 20), \ - ((char)(imm)&0xF0) ? 85 : (char)(imm) + ((char)(imm)>0xA ? 69 : 21), \ - ((char)(imm)&0xF0) ? 86 : (char)(imm) + ((char)(imm)>0x9 ? 70 : 22), \ - ((char)(imm)&0xF0) ? 87 : (char)(imm) + ((char)(imm)>0x8 ? 71 : 23), \ - ((char)(imm)&0xF0) ? 88 : (char)(imm) + ((char)(imm)>0x7 ? 72 : 24), \ - ((char)(imm)&0xF0) ? 89 : (char)(imm) + ((char)(imm)>0x6 ? 73 : 25), \ - ((char)(imm)&0xF0) ? 90 : (char)(imm) + ((char)(imm)>0x5 ? 74 : 26), \ - ((char)(imm)&0xF0) ? 91 : (char)(imm) + ((char)(imm)>0x4 ? 75 : 27), \ - ((char)(imm)&0xF0) ? 92 : (char)(imm) + ((char)(imm)>0x3 ? 76 : 28), \ - ((char)(imm)&0xF0) ? 93 : (char)(imm) + ((char)(imm)>0x2 ? 77 : 29), \ - ((char)(imm)&0xF0) ? 94 : (char)(imm) + ((char)(imm)>0x1 ? 78 : 30), \ - ((char)(imm)&0xF0) ? 95 : (char)(imm) + ((char)(imm)>0x0 ? 79 : 31), \ - ((char)(imm)&0xF0) ? 96 : (char)(imm) + ((char)(imm)>0xF ? 80 : 32), \ - ((char)(imm)&0xF0) ? 97 : (char)(imm) + ((char)(imm)>0xE ? 81 : 33), \ - ((char)(imm)&0xF0) ? 98 : (char)(imm) + ((char)(imm)>0xD ? 82 : 34), \ - ((char)(imm)&0xF0) ? 99 : (char)(imm) + ((char)(imm)>0xC ? 83 : 35), \ - ((char)(imm)&0xF0) ? 100 : (char)(imm) + ((char)(imm)>0xB ? 84 : 36), \ - ((char)(imm)&0xF0) ? 101 : (char)(imm) + ((char)(imm)>0xA ? 85 : 37), \ - ((char)(imm)&0xF0) ? 102 : (char)(imm) + ((char)(imm)>0x9 ? 86 : 38), \ - ((char)(imm)&0xF0) ? 103 : (char)(imm) + ((char)(imm)>0x8 ? 87 : 39), \ - ((char)(imm)&0xF0) ? 104 : (char)(imm) + ((char)(imm)>0x7 ? 88 : 40), \ - ((char)(imm)&0xF0) ? 105 : (char)(imm) + ((char)(imm)>0x6 ? 89 : 41), \ - ((char)(imm)&0xF0) ? 106 : (char)(imm) + ((char)(imm)>0x5 ? 90 : 42), \ - ((char)(imm)&0xF0) ? 107 : (char)(imm) + ((char)(imm)>0x4 ? 91 : 43), \ - ((char)(imm)&0xF0) ? 108 : (char)(imm) + ((char)(imm)>0x3 ? 92 : 44), \ - ((char)(imm)&0xF0) ? 109 : (char)(imm) + ((char)(imm)>0x2 ? 93 : 45), \ - ((char)(imm)&0xF0) ? 110 : (char)(imm) + ((char)(imm)>0x1 ? 94 : 46), \ - ((char)(imm)&0xF0) ? 111 : (char)(imm) + ((char)(imm)>0x0 ? 95 : 47), \ - ((char)(imm)&0xF0) ? 112 : (char)(imm) + ((char)(imm)>0xF ? 96 : 48), \ - ((char)(imm)&0xF0) ? 113 : (char)(imm) + ((char)(imm)>0xE ? 97 : 49), \ - ((char)(imm)&0xF0) ? 114 : (char)(imm) + ((char)(imm)>0xD ? 98 : 50), \ - ((char)(imm)&0xF0) ? 115 : (char)(imm) + ((char)(imm)>0xC ? 99 : 51), \ - ((char)(imm)&0xF0) ? 116 : (char)(imm) + ((char)(imm)>0xB ? 100 : 52), \ - ((char)(imm)&0xF0) ? 117 : (char)(imm) + ((char)(imm)>0xA ? 101 : 53), \ - ((char)(imm)&0xF0) ? 118 : (char)(imm) + ((char)(imm)>0x9 ? 102 : 54), \ - ((char)(imm)&0xF0) ? 119 : (char)(imm) + ((char)(imm)>0x8 ? 103 : 55), \ - ((char)(imm)&0xF0) ? 120 : (char)(imm) + ((char)(imm)>0x7 ? 104 : 56), \ - ((char)(imm)&0xF0) ? 121 : (char)(imm) + ((char)(imm)>0x6 ? 105 : 57), \ - ((char)(imm)&0xF0) ? 122 : (char)(imm) + ((char)(imm)>0x5 ? 106 : 58), \ - ((char)(imm)&0xF0) ? 123 : (char)(imm) + ((char)(imm)>0x4 ? 107 : 59), \ - ((char)(imm)&0xF0) ? 124 : (char)(imm) + ((char)(imm)>0x3 ? 108 : 60), \ - ((char)(imm)&0xF0) ? 125 : (char)(imm) + ((char)(imm)>0x2 ? 109 : 61), \ - ((char)(imm)&0xF0) ? 126 : (char)(imm) + ((char)(imm)>0x1 ? 110 : 62), \ - ((char)(imm)&0xF0) ? 127 : (char)(imm) + ((char)(imm)>0x0 ? 111 : 63)); }) - -static __inline__ __m512i __DEFAULT_FN_ATTRS + (__v32hi)_mm512_setzero_si512()); +} + +#define _mm512_bsrli_epi128(a, imm) \ + (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, @@ -1783,15 +1693,15 @@ _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) (__v32hi) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __A, - (__v32hi) _mm512_setzero_hi ()); + (__v32hi) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, @@ -1799,15 +1709,15 @@ _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) (__v64qi) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __A, - (__v64qi) _mm512_setzero_hi ()); + (__v64qi) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, @@ -1815,7 +1725,7 @@ _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) (__v64qi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, @@ -1823,21 +1733,21 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_kunpackd (__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_kunpackw (__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, @@ -1845,16 +1755,16 @@ _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) (__mmask32) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, (__v32hi) - _mm512_setzero_hi (), + _mm512_setzero_si512 (), (__mmask32) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, @@ -1862,15 +1772,15 @@ _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) (__mmask64) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, (__v64qi) - _mm512_setzero_hi (), + _mm512_setzero_si512 (), (__mmask64) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) { __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, @@ -1878,7 +1788,7 @@ _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) (__mmask32) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) { __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, @@ -1886,97 +1796,96 @@ _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) (__mmask64) __U); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_test_epi8_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_test_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_testn_epi8_mask (__m512i __A, __m512i __B) { - return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_qi()); + return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_testn_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_movepi8_mask (__m512i __A) { return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_movepi16_mask (__m512i __A) { return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi8 (__mmask64 __A) { return (__m512i) __builtin_ia32_cvtmask2b512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi16 (__mmask32 __A) { return (__m512i) __builtin_ia32_cvtmask2w512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastb_epi8 (__m128i __A) { - return (__m512i)__builtin_shufflevector((__v16qi) __A, - (__v16qi)_mm_undefined_si128(), + return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, @@ -1984,7 +1893,7 @@ _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) (__v64qi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, @@ -1992,7 +1901,7 @@ _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) (__v64qi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, @@ -2000,7 +1909,7 @@ _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) (__v32hi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, @@ -2008,16 +1917,15 @@ _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastw_epi16 (__m128i __A) { - return (__m512i)__builtin_shufflevector((__v8hi) __A, - (__v8hi)_mm_undefined_si128(), + return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, @@ -2025,7 +1933,7 @@ _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) (__v32hi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, @@ -2033,80 +1941,66 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, - (__v32hi) __A, - (__v32hi) _mm512_undefined_epi32 (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, - (__v32hi) __A, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __M); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_permutexvar_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, - (__v32hi) __A, - (__v32hi) __W, - (__mmask32) __M); -} - -#define _mm512_alignr_epi8(A, B, N) __extension__ ({\ - (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N), \ - (__v64qi)_mm512_undefined_pd(), \ - (__mmask64)-1); }) - -#define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\ - (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N), \ - (__v64qi)(__m512i)(W), \ - (__mmask64)(U)); }) - -#define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\ - (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N), \ - (__v64qi)_mm512_setzero_si512(), \ - (__mmask64)(U)); }) - -#define _mm512_dbsad_epu8(A, B, imm) __extension__ ({\ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1); }) - -#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) ({\ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) - -#define _mm512_maskz_dbsad_epu8(U, A, B, imm) ({\ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) - -static __inline__ __m512i __DEFAULT_FN_ATTRS + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, + (__v32hi)_mm512_permutexvar_epi16(__A, __B), + (__v32hi)__W); +} + +#define _mm512_alignr_epi8(A, B, N) \ + (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(N)) + +#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ + (__v64qi)(__m512i)(W)) + +#define _mm512_maskz_alignr_epi8(U, A, B, N) \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ + (__v64qi)(__m512i)_mm512_setzero_si512()) + +#define _mm512_dbsad_epu8(A, B, imm) \ + (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm)) + +#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ + (__v32hi)(__m512i)(W)) + +#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ + (__v32hi)_mm512_setzero_si512()) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sad_epu8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, (__v64qi) __B); } - - +#undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/clang/7.0.0/include/avx512cdintrin.h b/lib/clang/8.0.0/include/avx512cdintrin.h similarity index 79% rename from lib/clang/7.0.0/include/avx512cdintrin.h rename to lib/clang/8.0.0/include/avx512cdintrin.h index ec7e0cd..e639027 100644 --- a/lib/clang/7.0.0/include/avx512cdintrin.h +++ b/lib/clang/8.0.0/include/avx512cdintrin.h @@ -29,7 +29,7 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) @@ -82,49 +82,45 @@ _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32 (__m512i __A) { - return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_lzcnt_epi32(__A), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_lzcnt_epi32(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64 (__m512i __A) { - return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_lzcnt_epi64(__A), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_lzcnt_epi64(__A), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/clang/7.0.0/include/avx512dqintrin.h b/lib/clang/8.0.0/include/avx512dqintrin.h similarity index 65% rename from lib/clang/7.0.0/include/avx512dqintrin.h rename to lib/clang/8.0.0/include/avx512dqintrin.h index 2c431d9..6e6c293 100644 --- a/lib/clang/7.0.0/include/avx512dqintrin.h +++ b/lib/clang/8.0.0/include/avx512dqintrin.h @@ -29,180 +29,309 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline __mmask8 __DEFAULT_FN_ATTRS +_knot_mask8(__mmask8 __M) +{ + return __builtin_ia32_knotqi(__M); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kand_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kandn_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kxnor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kxor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) +{ + return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) +{ + return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); + return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) +{ + return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) +{ + return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); + return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) +{ + return (unsigned char)__builtin_ia32_ktestchi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) +{ + return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); + return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kadd_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_kadd_mask16(__mmask16 __A, __mmask16 __B) +{ + return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); +} + +#define _kshiftli_mask8(A, I) \ + (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)) + +#define _kshiftri_mask8(A, I) \ + (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_cvtmask8_u32(__mmask8 __A) { + return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_cvtu32_mask8(unsigned int __A) { + return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_load_mask8(__mmask8 *__A) { + return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_store_mask8(__mmask8 *__A, __mmask8 __B) { + *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_xor_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A ^ (__v8du)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_xor_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_xor_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512)((__v16su)__A ^ (__v16su)__B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_xor_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_xor_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_or_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A | (__v8du)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_or_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_or_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_or_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A | (__v16su)__B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_or_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_or_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_and_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A & (__v8du)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_and_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_and_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_and_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A & (__v16su)__B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_and_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_and_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_andnot_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_andnot_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_andnot_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_andnot_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi64 (__m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -210,7 +339,7 @@ _mm512_cvtpd_epi64 (__m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) __W, @@ -218,7 +347,7 @@ _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -226,22 +355,22 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epi64(A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu64 (__m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -249,7 +378,7 @@ _mm512_cvtpd_epu64 (__m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) __W, @@ -257,7 +386,7 @@ _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -265,22 +394,22 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epu64(A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi64 (__m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -288,7 +417,7 @@ _mm512_cvtps_epi64 (__m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) __W, @@ -296,7 +425,7 @@ _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -304,22 +433,22 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epi64(A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu64 (__m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -327,7 +456,7 @@ _mm512_cvtps_epu64 (__m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) __W, @@ -335,7 +464,7 @@ _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -343,62 +472,57 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epu64(A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_pd (__m512i __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) _mm512_setzero_pd(), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtepi64_pd(__A), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) _mm512_setzero_pd(), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtepi64_pd(__A), + (__v8df)_mm512_setzero_pd()); } -#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi64_pd(A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ps (__m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), @@ -406,7 +530,7 @@ _mm512_cvtepi64_ps (__m512i __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) __W, @@ -414,7 +538,7 @@ _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), @@ -422,23 +546,23 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi64_ps(A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi64 (__m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -446,7 +570,7 @@ _mm512_cvttpd_epi64 (__m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) __W, @@ -454,7 +578,7 @@ _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -462,22 +586,22 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epi64(A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu64 (__m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -485,7 +609,7 @@ _mm512_cvttpd_epu64 (__m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) __W, @@ -493,7 +617,7 @@ _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), @@ -501,22 +625,22 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epu64(A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi64 (__m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -524,7 +648,7 @@ _mm512_cvttps_epi64 (__m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) __W, @@ -532,7 +656,7 @@ _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -540,22 +664,22 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epi64(A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu64 (__m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -563,7 +687,7 @@ _mm512_cvttps_epu64 (__m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) __W, @@ -571,7 +695,7 @@ _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), @@ -579,63 +703,58 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epu64(A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_pd (__m512i __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) _mm512_setzero_pd(), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtepu64_pd(__A), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) _mm512_setzero_pd(), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtepu64_pd(__A), + (__v8df)_mm512_setzero_pd()); } -#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu64_pd(A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_ps (__m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), @@ -643,7 +762,7 @@ _mm512_cvtepu64_ps (__m512i __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) __W, @@ -651,7 +770,7 @@ _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), @@ -659,327 +778,326 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu64_ps(A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_range_pd(A, B, C) __extension__ ({ \ +#define _mm512_range_pd(A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_range_pd(W, U, A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_range_pd(U, A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_range_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_range_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_range_ps(A, B, C) __extension__ ({ \ +#define _mm512_range_ps(A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_range_ps(W, U, A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_range_ps(U, A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_range_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_range_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ +#define _mm_range_round_ss(A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8) -1, (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_range_round_ss(W, U, A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W),\ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_range_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ +#define _mm_range_round_sd(A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8) -1, (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_range_round_sd(W, U, A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W),\ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_range_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_pd(A, B) __extension__ ({ \ +#define _mm512_reduce_pd(A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_reduce_pd(W, U, A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm512_maskz_reduce_pd(U, A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_ps(A, B) __extension__ ({ \ +#define _mm512_reduce_ps(A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_reduce_ps(W, U, A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm512_maskz_reduce_ps(U, A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ +#define _mm512_reduce_round_pd(A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ +#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ +#define _mm512_maskz_reduce_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ +#define _mm512_reduce_round_ps(A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ +#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ +#define _mm512_maskz_reduce_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm_reduce_ss(A, B, C) __extension__ ({ \ +#define _mm_reduce_ss(A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_reduce_ss(W, U, A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_reduce_ss(U, A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ +#define _mm_reduce_round_ss(A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(C), (int)(R)); }) + (__mmask8)(U), (int)(C), (int)(R)) -#define _mm_reduce_sd(A, B, C) __extension__ ({ \ +#define _mm_reduce_sd(A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_reduce_sd(W, U, A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_reduce_sd(U, A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ +#define _mm_reduce_round_sd(A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(C), (int)(R)); }) + (__mmask8)-1, (int)(C), (int)(R)) -#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(C), (int)(R)); }) - -static __inline__ __mmask16 __DEFAULT_FN_ATTRS + (__mmask8)(U), (int)(C), (int)(R)) + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_movepi32_mask (__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi32 (__mmask16 __A) { return (__m512i) __builtin_ia32_cvtmask2d512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi64 (__mmask8 __A) { return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_movepi64_mask (__m512i __A) { return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x2 (__m128 __A) { - return (__m512)__builtin_shufflevector((__v4sf)__A, - (__v4sf)_mm_undefined_ps(), + return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -987,7 +1105,7 @@ _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -995,7 +1113,7 @@ _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x8(__m256 __A) { return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, @@ -1003,30 +1121,30 @@ _mm512_broadcast_f32x8(__m256 __A) 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { - return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { - return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x2(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -1034,7 +1152,7 @@ _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -1042,16 +1160,15 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x2 (__m128i __A) { - return (__m512i)__builtin_shufflevector((__v4si)__A, - (__v4si)_mm_undefined_si128(), + return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1059,7 +1176,7 @@ _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1067,7 +1184,7 @@ _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x8(__m256i __A) { return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, @@ -1075,30 +1192,30 @@ _mm512_broadcast_i32x8(__m256i __A) 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x2(__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1106,7 +1223,7 @@ _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1114,218 +1231,161 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8di)_mm512_setzero_si512()); } -#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - ((imm) & 1) ? 8 : 0, \ - ((imm) & 1) ? 9 : 1, \ - ((imm) & 1) ? 10 : 2, \ - ((imm) & 1) ? 11 : 3, \ - ((imm) & 1) ? 12 : 4, \ - ((imm) & 1) ? 13 : 5, \ - ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7); }) - -#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)(W)); }) - -#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) - -#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ - (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2); }) - -#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)); }) - -#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)_mm_setzero_pd()); }) - -#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - ((imm) & 1) ? 8 : 0, \ - ((imm) & 1) ? 9 : 1, \ - ((imm) & 1) ? 10 : 2, \ - ((imm) & 1) ? 11 : 3, \ - ((imm) & 1) ? 12 : 4, \ - ((imm) & 1) ? 13 : 5, \ - ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7); }) - -#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ - (__v8si)(W)); }) - -#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) - -#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_undefined_epi32(), \ - 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2); }) - -#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)); }) - -#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_di()); }) - -#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ - ((imm) & 0x1) ? 0 : 16, \ - ((imm) & 0x1) ? 1 : 17, \ - ((imm) & 0x1) ? 2 : 18, \ - ((imm) & 0x1) ? 3 : 19, \ - ((imm) & 0x1) ? 4 : 20, \ - ((imm) & 0x1) ? 5 : 21, \ - ((imm) & 0x1) ? 6 : 22, \ - ((imm) & 0x1) ? 7 : 23, \ - ((imm) & 0x1) ? 16 : 8, \ - ((imm) & 0x1) ? 17 : 9, \ - ((imm) & 0x1) ? 18 : 10, \ - ((imm) & 0x1) ? 19 : 11, \ - ((imm) & 0x1) ? 20 : 12, \ - ((imm) & 0x1) ? 21 : 13, \ - ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15); }) - -#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_extractf32x8_ps(A, imm) \ + (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_undefined_ps(), \ + (__mmask8)-1) + +#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ + (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)(__m256)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extractf32x8_ps(U, A, imm) \ + (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U)) + +#define _mm512_extractf64x2_pd(A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)-1) + +#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extractf64x2_pd(U, A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U)) + +#define _mm512_extracti32x8_epi32(A, imm) \ + (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)_mm256_undefined_si256(), \ + (__mmask8)-1) + +#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ + (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ + (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U)) + +#define _mm512_extracti64x2_epi64(A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + (int)(imm), \ + (__v2di)_mm_undefined_si128(), \ + (__mmask8)-1) + +#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + (int)(imm), \ + (__v2di)(__m128i)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + (int)(imm), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)(U)) + +#define _mm512_insertf32x8(A, B, imm) \ + (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ + (__v8sf)(__m256)(B), (int)(imm)) + +#define _mm512_mask_insertf32x8(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf32x8(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) - -#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ - (((imm) & 0x3) == 0) ? 8 : 0, \ - (((imm) & 0x3) == 0) ? 9 : 1, \ - (((imm) & 0x3) == 1) ? 8 : 2, \ - (((imm) & 0x3) == 1) ? 9 : 3, \ - (((imm) & 0x3) == 2) ? 8 : 4, \ - (((imm) & 0x3) == 2) ? 9 : 5, \ - (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7); }) - -#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ + (__v16sf)_mm512_setzero_ps()) + +#define _mm512_insertf64x2(A, B, imm) \ + (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ + (__v2df)(__m128d)(B), (int)(imm)) + +#define _mm512_mask_insertf64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ - ((imm) & 0x1) ? 0 : 16, \ - ((imm) & 0x1) ? 1 : 17, \ - ((imm) & 0x1) ? 2 : 18, \ - ((imm) & 0x1) ? 3 : 19, \ - ((imm) & 0x1) ? 4 : 20, \ - ((imm) & 0x1) ? 5 : 21, \ - ((imm) & 0x1) ? 6 : 22, \ - ((imm) & 0x1) ? 7 : 23, \ - ((imm) & 0x1) ? 16 : 8, \ - ((imm) & 0x1) ? 17 : 9, \ - ((imm) & 0x1) ? 18 : 10, \ - ((imm) & 0x1) ? 19 : 11, \ - ((imm) & 0x1) ? 20 : 12, \ - ((imm) & 0x1) ? 21 : 13, \ - ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15); }) - -#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_inserti32x8(A, B, imm) \ + (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ + (__v8si)(__m256i)(B), (int)(imm)) + +#define _mm512_mask_inserti32x8(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti32x8(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) - -#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ - (((imm) & 0x3) == 0) ? 8 : 0, \ - (((imm) & 0x3) == 0) ? 9 : 1, \ - (((imm) & 0x3) == 1) ? 8 : 2, \ - (((imm) & 0x3) == 1) ? 9 : 3, \ - (((imm) & 0x3) == 2) ? 8 : 4, \ - (((imm) & 0x3) == 2) ? 9 : 5, \ - (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7); }) - -#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ + (__v16si)_mm512_setzero_si512()) + +#define _mm512_inserti64x2(A, B, imm) \ + (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) + +#define _mm512_mask_inserti64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti64x2(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm512_mask_fpclass_ps_mask(U, A, imm) \ (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)(U)); }) + (int)(imm), (__mmask16)(U)) -#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm512_fpclass_ps_mask(A, imm) \ (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)-1); }) + (int)(imm), (__mmask16)-1) -#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm512_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm512_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_sd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_sd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_ss_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_ss_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) +#undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/clang/7.0.0/include/avx512erintrin.h b/lib/clang/8.0.0/include/avx512erintrin.h similarity index 78% rename from lib/clang/7.0.0/include/avx512erintrin.h rename to lib/clang/8.0.0/include/avx512erintrin.h index 8ff212c..6348275 100644 --- a/lib/clang/7.0.0/include/avx512erintrin.h +++ b/lib/clang/8.0.0/include/avx512erintrin.h @@ -27,21 +27,21 @@ #ifndef __AVX512ERINTRIN_H #define __AVX512ERINTRIN_H -// exp2a23 -#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ +/* exp2a23 */ +#define _mm512_exp2a23_round_pd(A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_exp2a23_pd(A) \ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -52,20 +52,20 @@ #define _mm512_maskz_exp2a23_pd(M, A) \ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \ +#define _mm512_exp2a23_round_ps(A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_exp2a23_ps(A) \ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -76,21 +76,21 @@ #define _mm512_maskz_exp2a23_ps(M, A) \ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -// rsqrt28 -#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ +/* rsqrt28 */ +#define _mm512_rsqrt28_round_pd(A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_rsqrt28_pd(A) \ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -101,20 +101,20 @@ #define _mm512_maskz_rsqrt28_pd(M, A) \ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \ +#define _mm512_rsqrt28_round_ps(A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_rsqrt28_ps(A) \ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -125,23 +125,23 @@ #define _mm512_maskz_rsqrt28_ps(M, A) \ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \ +#define _mm_rsqrt28_round_ss(A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rsqrt28_ss(A, B) \ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -152,23 +152,23 @@ #define _mm_maskz_rsqrt28_ss(M, A, B) \ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \ +#define _mm_rsqrt28_round_sd(A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rsqrt28_sd(A, B) \ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -179,21 +179,21 @@ #define _mm_maskz_rsqrt28_sd(M, A, B) \ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -// rcp28 -#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ +/* rcp28 */ +#define _mm512_rcp28_round_pd(A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rcp28_round_pd(M, A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_rcp28_pd(A) \ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -204,20 +204,20 @@ #define _mm512_maskz_rcp28_pd(M, A) \ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \ +#define _mm512_rcp28_round_ps(A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rcp28_round_ps(M, A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_rcp28_ps(A) \ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -228,23 +228,23 @@ #define _mm512_maskz_rcp28_ps(M, A) \ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \ +#define _mm_rcp28_round_ss(A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rcp28_round_ss(M, A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rcp28_ss(A, B) \ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -255,23 +255,23 @@ #define _mm_maskz_rcp28_ss(M, A, B) \ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \ +#define _mm_rcp28_round_sd(A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rcp28_round_sd(M, A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rcp28_sd(A, B) \ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -282,4 +282,4 @@ #define _mm_maskz_rcp28_sd(M, A, B) \ _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#endif // __AVX512ERINTRIN_H +#endif /* __AVX512ERINTRIN_H */ diff --git a/lib/clang/7.0.0/include/avx512fintrin.h b/lib/clang/8.0.0/include/avx512fintrin.h similarity index 60% rename from lib/clang/7.0.0/include/avx512fintrin.h rename to lib/clang/8.0.0/include/avx512fintrin.h index f513742..d00e553 100644 --- a/lib/clang/7.0.0/include/avx512fintrin.h +++ b/lib/clang/8.0.0/include/avx512fintrin.h @@ -173,51 +173,52 @@ typedef enum } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) /* Create vectors with repeated elements */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void) { - return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; + return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } #define _mm512_setzero_epi32 _mm512_setzero_si512 -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void) { return (__m512d)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void) { return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void) { return (__m512i)__builtin_ia32_undef512(); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32 (__m128i __A) { - return (__m512i)__builtin_shufflevector((__v4si) __A, - (__v4si)_mm_undefined_si128(), + return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, @@ -225,7 +226,7 @@ _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, @@ -233,15 +234,14 @@ _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) (__v16si) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64 (__m128i __A) { - return (__m512i)__builtin_shufflevector((__v2di) __A, - (__v2di) _mm_undefined_si128(), + return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, @@ -250,7 +250,7 @@ _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, @@ -259,122 +259,122 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void) { - return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } #define _mm512_setzero _mm512_setzero_ps -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void) { - return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w) { - return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w) { - return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w) { - return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512i)(__v64qi){ + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w) { - return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512i)(__v32hi){ + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s) { - return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, - __s, __s, __s, __s, __s, __s, __s, __s }; + return __extension__ (__m512i)(__v16si){ + __s, __s, __s, __s, __s, __s, __s, __s, + __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_set1_epi32(__mmask16 __M, int __A) +static __inline __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { - return (__m512i)__builtin_ia32_selectd_512(__M, + return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si)_mm512_set1_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d) { - return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; + return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -#ifdef __x86_64__ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_set1_epi64(__A), (__v8di)_mm512_setzero_si512()); } -#endif -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A) { - return (__m512)__builtin_shufflevector((__v4sf) __A, - (__v4sf)_mm_undefined_ps(), + return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) { - return (__m512i)(__v16si) + return __extension__ (__m512i)(__v16si) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) { - return (__m512i) (__v8di) + return __extension__ (__m512i) (__v8di) { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd (double __A, double __B, double __C, double __D) { - return (__m512d) + return __extension__ (__m512d) { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps (float __A, float __B, float __C, float __D) { - return (__m512) + return __extension__ (__m512) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } @@ -391,120 +391,119 @@ _mm512_set4_ps (float __A, float __B, float __C, float __D) #define _mm512_setr4_ps(e0,e1,e2,e3) \ _mm512_set4_ps((e3),(e2),(e1),(e0)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A) { - return (__m512d)__builtin_shufflevector((__v2df) __A, - (__v2df) _mm_undefined_pd(), + return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, 0, 0, 0, 0, 0, 0, 0, 0); } /* Cast between vector types */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1); } -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256 (__m512d __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); } -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256 (__m512 __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps (__m512d __A) { return (__m512) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512 (__m512d __A) { return (__m512i) (__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512 (__m128d __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd (__m512 __A) { return (__m512d) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512 (__m512 __A) { return (__m512i) (__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512 (__m128 __A) { return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512 (__m128i __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512 (__m256i __A) { return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps (__m512i __A) { return (__m512) (__A); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd (__m512i __A) { return (__m512d) (__A); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128 (__m512i __A) { return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); } -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256 (__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); @@ -522,7 +521,7 @@ _mm512_mask2int(__mmask16 __a) return (int)__a; } -/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a +/// Constructs a 512-bit floating-point vector of [8 x double] from a /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 384 bits are set /// to zero. @@ -535,13 +534,13 @@ _mm512_mask2int(__mmask16 __a) /// A 128-bit vector of [2 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); } -/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a +/// Constructs a 512-bit floating-point vector of [8 x double] from a /// 256-bit floating-point vector of [4 x double]. The lower 256 bits /// contain the value of the source vector. The upper 256 bits are set /// to zero. @@ -554,13 +553,13 @@ _mm512_zextpd128_pd512(__m128d __a) /// A 256-bit vector of [4 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); } -/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a +/// Constructs a 512-bit floating-point vector of [16 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 384 bits are set to zero. /// @@ -572,13 +571,13 @@ _mm512_zextpd256_pd512(__m256d __a) /// A 128-bit vector of [4 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); } -/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a +/// Constructs a 512-bit floating-point vector of [16 x float] from a /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain /// the value of the source vector. The upper 256 bits are set to zero. /// @@ -590,13 +589,13 @@ _mm512_zextps128_ps512(__m128 __a) /// A 256-bit vector of [8 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -/// \brief Constructs a 512-bit integer vector from a 128-bit integer vector. +/// Constructs a 512-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 384 bits are set to zero. /// @@ -608,13 +607,13 @@ _mm512_zextps256_ps512(__m256 __a) /// A 128-bit integer vector. /// \returns A 512-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 384 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); } -/// \brief Constructs a 512-bit integer vector from a 256-bit integer vector. +/// Constructs a 512-bit integer vector from a 256-bit integer vector. /// The lower 256 bits contain the value of the source vector. The upper /// 256 bits are set to zero. /// @@ -626,20 +625,20 @@ _mm512_zextsi128_si512(__m128i __a) /// A 256-bit integer vector. /// \returns A 512-bit integer vector. The lower 256 bits contain the value of /// the parameter. The upper 256 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); } /* Bitwise operators */ -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -647,20 +646,20 @@ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si) __src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, @@ -668,26 +667,26 @@ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di) __src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512 (__m512i __A, __m512i __B) { - return (__m512i)(~(__v8du)(__A) & (__v8du)__B); + return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32 (__m512i __A, __m512i __B) { - return (__m512i)(~(__v16su)(__A) & (__v16su)__B); + return (__m512i)(~(__v16su)__A & (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -695,20 +694,20 @@ _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B) { - return (__m512i)(~(__v8du)(__A) & (__v8du)__B); + return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -716,20 +715,20 @@ _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -737,19 +736,19 @@ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, @@ -757,19 +756,19 @@ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -777,19 +776,19 @@ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, @@ -797,25 +796,25 @@ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -823,49 +822,49 @@ _mm512_xor_si512(__m512i __a, __m512i __b) /* Arithmetic */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -873,7 +872,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -881,13 +880,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -895,7 +894,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -903,13 +902,13 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -917,7 +916,7 @@ _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -925,13 +924,13 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -939,7 +938,7 @@ _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -947,107 +946,81 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) +#define _mm512_max_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) -#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask_max_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_max_round_pd((A), (B), (R)), \ + (__v8df)(W)) -#define _mm512_max_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) +#define _mm512_maskz_max_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_max_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_max_pd(__A, __B), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_max_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } -#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) +#define _mm512_max_round_ps(A, B, R) \ + (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) -#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) +#define _mm512_mask_max_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ + (__v16sf)(W)) -#define _mm512_max_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) +#define _mm512_maskz_max_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_max_ps(__A, __B), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_max_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1056,7 +1029,7 @@ _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1065,25 +1038,25 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_max_round_ss(A, B, R) __extension__ ({ \ +#define _mm_max_round_ss(A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_max_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_max_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1092,7 +1065,7 @@ _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1101,238 +1074,188 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_max_round_sd(A, B, R) __extension__ ({ \ +#define _mm_max_round_sd(A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_max_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_max_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m512i -__DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_max_epi32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_max_epi32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_max_epu32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_max_epu32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_max_epi64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_max_epi64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_max_epu64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_max_epu64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) +#define _mm512_min_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) -#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask_min_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_min_round_pd((A), (B), (R)), \ + (__v8df)(W)) -#define _mm512_min_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) +#define _mm512_maskz_min_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_min_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_min_pd(__A, __B), + (__v8df)__W); } -#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) - -#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) - -#define _mm512_min_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) - -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_min_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +#define _mm512_min_round_ps(A, B, R) \ + (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) + +#define _mm512_mask_min_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ + (__v16sf)(W)) + +#define _mm512_maskz_min_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()) + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_min_ps(__A, __B), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_min_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1341,7 +1264,7 @@ _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1350,25 +1273,25 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_min_round_ss(A, B, R) __extension__ ({ \ +#define _mm_min_round_ss(A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_min_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_min_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1377,7 +1300,7 @@ _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1386,144 +1309,120 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_min_round_sd(A, B, R) __extension__ ({ \ +#define _mm_min_round_sd(A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_min_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_min_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m512i -__DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_min_epi32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_min_epi32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_min_epu32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_min_epu32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_min_epi64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_min_epi64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_min_epu64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_min_epu64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1531,7 +1430,7 @@ _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1539,13 +1438,13 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1553,7 +1452,7 @@ _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1561,13 +1460,13 @@ _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A * (__v16su) __B); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1575,7 +1474,7 @@ _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1583,92 +1482,91 @@ _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) (__v16si)__W); } -#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mullox_epi64 (__m512i __A, __m512i __B) { + return (__m512i) ((__v8du) __A * (__v8du) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_mullox_epi64(__A, __B), + (__v8di)__W); +} + +#define _mm512_sqrt_round_pd(A, R) \ + (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) -#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask_sqrt_round_pd(W, U, A, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sqrt_round_pd((A), (R)), \ + (__v8df)(__m512d)(W)) -#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) +#define _mm512_maskz_sqrt_round_pd(U, A, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sqrt_round_pd((A), (R)), \ + (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_sqrt_pd(__m512d __a) +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_sqrt_pd(__m512d __A) { - return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, - (__v8df) _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_sqrt_pd(__A), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_sqrt_pd(__A), + (__v8df)_mm512_setzero_pd()); } -#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) +#define _mm512_sqrt_round_ps(A, R) \ + (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) -#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) +#define _mm512_mask_sqrt_round_ps(W, U, A, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ + (__v16sf)(__m512)(W)) -#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) +#define _mm512_maskz_sqrt_round_ps(U, A, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ + (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_sqrt_ps(__m512 __a) +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_sqrt_ps(__m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_sqrt_ps(__A), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_sqrt_ps(__A), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1676,7 +1574,7 @@ _mm512_rsqrt14_pd(__m512d __A) _mm512_setzero_pd (), (__mmask8) -1);} -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1684,7 +1582,7 @@ _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1693,7 +1591,7 @@ _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1702,7 +1600,7 @@ _mm512_rsqrt14_ps(__m512 __A) (__mmask16) -1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1710,7 +1608,7 @@ _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1719,7 +1617,7 @@ _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1729,7 +1627,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1738,7 +1636,7 @@ _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1747,7 +1645,7 @@ _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, @@ -1757,7 +1655,7 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1766,7 +1664,7 @@ _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1775,7 +1673,7 @@ _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1784,7 +1682,7 @@ _mm512_rcp14_pd(__m512d __A) (__mmask8) -1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1792,7 +1690,7 @@ _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1801,7 +1699,7 @@ _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1810,7 +1708,7 @@ _mm512_rcp14_ps(__m512 __A) (__mmask16) -1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1818,7 +1716,7 @@ _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1827,7 +1725,7 @@ _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1837,7 +1735,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1846,7 +1744,7 @@ _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1855,7 +1753,7 @@ _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, @@ -1865,7 +1763,7 @@ _mm_rcp14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1874,7 +1772,7 @@ _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1883,7 +1781,7 @@ _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) (__mmask8) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1892,7 +1790,7 @@ _mm512_floor_ps(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1901,7 +1799,7 @@ _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1910,7 +1808,7 @@ _mm512_floor_pd(__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1919,7 +1817,7 @@ _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1928,7 +1826,7 @@ _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1937,7 +1835,7 @@ _mm512_ceil_ps(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1946,7 +1844,7 @@ _mm512_ceil_pd(__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1955,758 +1853,672 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { - return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_abs_epi64(__A), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_abs_epi64(__A), + (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { - return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_abs_epi32(__A), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_abs_epi32(__A), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_add_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) _mm_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_add_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } -#define _mm_add_round_ss(A, B, R) __extension__ ({ \ +#define _mm_add_round_ss(A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_add_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_add_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_add_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) _mm_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_add_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } -#define _mm_add_round_sd(A, B, R) __extension__ ({ \ +#define _mm_add_round_sd(A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_add_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_add_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -#define _mm512_add_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) - -#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) - -#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) - -#define _mm512_add_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) - -#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) - -#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) - -static __inline__ __m128 __DEFAULT_FN_ATTRS +#define _mm512_add_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) + +#define _mm512_mask_add_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_add_round_pd((A), (B), (R)), \ + (__v8df)(__m512d)(W)); + +#define _mm512_maskz_add_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_add_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()); + +#define _mm512_add_round_ps(A, B, R) \ + (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) + +#define _mm512_mask_add_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ + (__v16sf)(__m512)(W)); + +#define _mm512_maskz_add_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()); + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_sub_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) _mm_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_sub_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } -#define _mm_sub_round_ss(A, B, R) __extension__ ({ \ +#define _mm_sub_round_ss(A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sub_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_sub_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) _mm_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_sub_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } -#define _mm_sub_round_sd(A, B, R) __extension__ ({ \ +#define _mm_sub_round_sd(A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sub_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) - -#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) - -#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) - -#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) - -#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); - -#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); - -static __inline__ __m128 __DEFAULT_FN_ATTRS +#define _mm512_sub_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) + +#define _mm512_mask_sub_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ + (__v8df)(__m512d)(W)); + +#define _mm512_maskz_sub_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()); + +#define _mm512_sub_round_ps(A, B, R) \ + (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) + +#define _mm512_mask_sub_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ + (__v16sf)(__m512)(W)); + +#define _mm512_maskz_sub_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()); + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_mul_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) _mm_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_mul_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } -#define _mm_mul_round_ss(A, B, R) __extension__ ({ \ +#define _mm_mul_round_ss(A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_mul_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_mul_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_mul_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) _mm_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_mul_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } -#define _mm_mul_round_sd(A, B, R) __extension__ ({ \ +#define _mm_mul_round_sd(A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_mul_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_mul_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) - -#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) - -#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) - -#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) - -#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); - -#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); - -static __inline__ __m128 __DEFAULT_FN_ATTRS +#define _mm512_mul_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) + +#define _mm512_mask_mul_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ + (__v8df)(__m512d)(W)); + +#define _mm512_maskz_mul_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()); + +#define _mm512_mul_round_ps(A, B, R) \ + (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) + +#define _mm512_mask_mul_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ + (__v16sf)(__m512)(W)); + +#define _mm512_maskz_mul_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()); + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_div_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) _mm_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_div_ss(__A, __B); + return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } -#define _mm_div_round_ss(A, B, R) __extension__ ({ \ +#define _mm_div_round_ss(A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_div_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_div_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_div_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) _mm_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + __A = _mm_div_sd(__A, __B); + return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } -#define _mm_div_round_sd(A, B, R) __extension__ ({ \ +#define _mm_div_round_sd(A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_div_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_div_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -#define _mm512_div_round_pd(A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) - -#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) - -#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ - (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) - -#define _mm512_div_round_ps(A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) - -#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); - -#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ - (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); - -#define _mm512_roundscale_ps(A, B) __extension__ ({ \ +#define _mm512_div_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R)) + +#define _mm512_mask_div_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_div_round_pd((A), (B), (R)), \ + (__v8df)(__m512d)(W)); + +#define _mm512_maskz_div_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_div_round_pd((A), (B), (R)), \ + (__v8df)_mm512_setzero_pd()); + +#define _mm512_div_round_ps(A, B, R) \ + (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R)) + +#define _mm512_mask_div_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ + (__v16sf)(__m512)(W)); + +#define _mm512_maskz_div_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ + (__v16sf)_mm512_setzero_ps()); + +#define _mm512_roundscale_ps(A, B) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)(__m512)(A), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ +#define _mm512_mask_roundscale_ps(A, B, C, imm) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ +#define _mm512_maskz_roundscale_ps(A, B, imm) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(A), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ +#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(A), (int)(R)); }) + (__mmask16)(A), (int)(R)) -#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ +#define _mm512_roundscale_round_ps(A, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_roundscale_pd(A, B) __extension__ ({ \ +#define _mm512_roundscale_pd(A, B) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)(__m512d)(A), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) - -#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_roundscale_pd(A, B, C, imm) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ +#define _mm512_maskz_roundscale_pd(A, B, imm) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(A), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ +#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(A), (int)(R)); }) + (__mmask8)(A), (int)(R)) -#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ +#define _mm512_roundscale_round_pd(A, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), (__mmask8)-1, \ - (int)(R)); }) + (__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), (__mmask8)-1, \ - (int)(R)); }) + (__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R)) -#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2716,7 +2528,7 @@ _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2726,7 +2538,7 @@ _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, @@ -2736,7 +2548,7 @@ _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, @@ -2746,7 +2558,7 @@ _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2756,7 +2568,7 @@ _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2766,7 +2578,7 @@ _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, @@ -2776,17 +2588,17 @@ _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + -(__v8df) __B, (__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, @@ -2796,7 +2608,7 @@ _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, @@ -2806,17 +2618,17 @@ _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + -(__v8df) __B, -(__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, @@ -2826,91 +2638,91 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmadd_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), (__mmask16)-1, \ - (int)(R)); }) + (__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsub_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ - (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), (__mmask16)-1, \ - (int)(R)); }) +#define _mm512_fnmadd_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R)) -#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ - (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ +#define _mm512_fnmsub_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2920,7 +2732,7 @@ _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2930,7 +2742,7 @@ _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, @@ -2940,7 +2752,7 @@ _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, @@ -2950,7 +2762,7 @@ _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2960,7 +2772,7 @@ _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2970,7 +2782,7 @@ _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, @@ -2980,17 +2792,17 @@ _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + -(__v16sf) __B, (__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, @@ -3000,7 +2812,7 @@ _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, @@ -3010,17 +2822,17 @@ _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + -(__v16sf) __B, -(__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, @@ -3030,96 +2842,96 @@ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmaddsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsubadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -3129,7 +2941,7 @@ _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -3139,7 +2951,7 @@ _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, @@ -3149,56 +2961,56 @@ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmaddsub_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsubadd_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3208,7 +3020,7 @@ _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3218,7 +3030,7 @@ _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, @@ -3228,7 +3040,7 @@ _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, @@ -3238,7 +3050,7 @@ _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3248,7 +3060,7 @@ _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3258,7 +3070,7 @@ _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, @@ -3268,337 +3080,309 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) - -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ - (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + -(__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + -(__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ - (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) +#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + -(__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ - (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + -(__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R)) -#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ - (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)); }) +#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + -(__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) -{ - return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} +#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R)) -#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ - (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ + +#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) - - -#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ - (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + -(__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } /* Vector permutations */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I - /* idx */ , - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, + (__v16si) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, + __m512i __B) +{ + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), + (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, - __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I - /* idx */ , - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), + (__v16si)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, - __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I - /* idx */ , - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I - /* idx */ , - (__v8di) __A, - (__v8di) __B, - (__mmask8) -1); + return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, + (__v8di) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, - __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I - /* idx */ , - (__v8di) __A, - (__v8di) __B, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), + (__v8di)__A); } +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, + __m512i __B) +{ + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), + (__v8di)__I); +} -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, - __m512i __I, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I - /* idx */ , - (__v8di) __A, - (__v8di) __B, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), + (__v8di)_mm512_setzero_si512()); } -#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(A), \ - ((int)(I) & 0x7) + 0, \ - ((int)(I) & 0x7) + 1, \ - ((int)(I) & 0x7) + 2, \ - ((int)(I) & 0x7) + 3, \ - ((int)(I) & 0x7) + 4, \ - ((int)(I) & 0x7) + 5, \ - ((int)(I) & 0x7) + 6, \ - ((int)(I) & 0x7) + 7); }) +#define _mm512_alignr_epi64(A, B, I) \ + (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) -#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ +#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ +#define _mm512_maskz_alignr_epi64(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) - -#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(A), \ - ((int)(I) & 0xf) + 0, \ - ((int)(I) & 0xf) + 1, \ - ((int)(I) & 0xf) + 2, \ - ((int)(I) & 0xf) + 3, \ - ((int)(I) & 0xf) + 4, \ - ((int)(I) & 0xf) + 5, \ - ((int)(I) & 0xf) + 6, \ - ((int)(I) & 0xf) + 7, \ - ((int)(I) & 0xf) + 8, \ - ((int)(I) & 0xf) + 9, \ - ((int)(I) & 0xf) + 10, \ - ((int)(I) & 0xf) + 11, \ - ((int)(I) & 0xf) + 12, \ - ((int)(I) & 0xf) + 13, \ - ((int)(I) & 0xf) + 14, \ - ((int)(I) & 0xf) + 15); }) - -#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ + (__v8di)_mm512_setzero_si512()) + +#define _mm512_alignr_epi32(A, B, I) \ + (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) + +#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ +#define _mm512_maskz_alignr_epi32(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) /* Vector Extract */ -#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - ((I) & 1) ? 4 : 0, \ - ((I) & 1) ? 5 : 1, \ - ((I) & 1) ? 6 : 2, \ - ((I) & 1) ? 7 : 3); }) - -#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ - (__v4df)(W)); }) - -#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) - -#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ - (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - 0 + ((I) & 0x3) * 4, \ - 1 + ((I) & 0x3) * 4, \ - 2 + ((I) & 0x3) * 4, \ - 3 + ((I) & 0x3) * 4); }) - -#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)); }) - -#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ - (__v4sf)_mm_setzero_ps()); }) +#define _mm512_extractf64x4_pd(A, I) \ + (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ + (__v4df)_mm256_undefined_pd(), \ + (__mmask8)-1) + +#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ + (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extractf64x4_pd(U, A, imm) \ + (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U)) + +#define _mm512_extractf32x4_ps(A, I) \ + (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)-1) + +#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ + (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extractf32x4_ps(U, A, imm) \ + (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U)) /* Vector Blend */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -3606,7 +3390,7 @@ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) (__v8df) __A); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -3614,7 +3398,7 @@ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) (__v16sf) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -3622,7 +3406,7 @@ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) (__v8di) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -3632,15 +3416,15 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Compare */ -#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ +#define _mm512_cmp_round_ps_mask(A, B, P, R) \ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ +#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3687,15 +3471,15 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) #define _mm512_mask_cmpord_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) -#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ +#define _mm512_cmp_round_pd_mask(A, B, P, R) \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ +#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3744,23 +3528,23 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Conversion */ -#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epu32(A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3770,7 +3554,7 @@ _mm512_cvttps_epu32(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3779,7 +3563,7 @@ _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3788,70 +3572,65 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi32_ps(A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu32_ps(A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps (__m512i __A) { - return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, - (__v16sf) _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_cvtepu32_ps(__A), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { - return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_cvtepu32_ps(__A), + (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3859,7 +3638,7 @@ _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3867,52 +3646,47 @@ _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps (__m512i __A) { - return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, - (__v16sf) _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_cvtepi32_ps(__A), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { - return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_cvtepi32_ps(__A), + (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3920,7 +3694,7 @@ _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3928,34 +3702,34 @@ _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); } -#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_ps(A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps (__m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3964,7 +3738,7 @@ _mm512_cvtpd_ps (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3973,7 +3747,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3982,7 +3756,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo (__m512d __A) { return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), @@ -3990,7 +3764,7 @@ _mm512_cvtpd_pslo (__m512d __A) 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) { return (__m512) __builtin_shufflevector ( @@ -4000,53 +3774,53 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ +#define _mm512_cvt_roundps_ph(A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)(__m256i)(U), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_cvtps_ph(A, I) __extension__ ({ \ +#define _mm512_cvtps_ph(A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ +#define _mm512_mask_cvtps_ph(U, W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)(__m256i)(U), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ +#define _mm512_maskz_cvtps_ph(W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundph_ps(A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundph_ps(U, A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -4056,7 +3830,7 @@ _mm512_cvtph_ps(__m256i __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -4065,7 +3839,7 @@ _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -4074,22 +3848,22 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epi32(A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a) { return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, @@ -4098,7 +3872,7 @@ _mm512_cvttpd_epi32(__m512d __a) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, @@ -4107,7 +3881,7 @@ _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, @@ -4116,22 +3890,22 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epi32(A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a) { return (__m512i) @@ -4140,7 +3914,7 @@ _mm512_cvttps_epi32(__m512 __a) (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, @@ -4149,7 +3923,7 @@ _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, @@ -4158,22 +3932,22 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epi32(A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32 (__m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -4182,7 +3956,7 @@ _mm512_cvtps_epi32 (__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -4191,7 +3965,7 @@ _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -4201,22 +3975,22 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epi32(A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4226,7 +4000,7 @@ _mm512_cvtpd_epi32 (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4235,7 +4009,7 @@ _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4245,32 +4019,32 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epu32(A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32 ( __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ (__v16si)\ - _mm512_undefined_epi32 (),\ + _mm512_undefined_epi32 (), (__mmask16) -1,\ - _MM_FROUND_CUR_DIRECTION);\ + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, @@ -4279,7 +4053,7 @@ _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, @@ -4289,22 +4063,22 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epu32(A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(W), \ - (__mmask8)(U), (int)(R)); }) + (__v8si)(__m256i)(W), \ + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4314,7 +4088,7 @@ _mm512_cvtpd_epu32 (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4323,7 +4097,7 @@ _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4333,13 +4107,13 @@ _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a) { return __a[0]; } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a) { return __a[0]; @@ -4347,14 +4121,14 @@ _mm512_cvtss_f32(__m512 __a) /* Unpack and Interleave */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4362,7 +4136,7 @@ _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4370,14 +4144,14 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4385,7 +4159,7 @@ _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4393,7 +4167,7 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, @@ -4403,7 +4177,7 @@ _mm512_unpackhi_ps(__m512 __a, __m512 __b) 2+12, 18+12, 3+12, 19+12); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4411,7 +4185,7 @@ _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4419,7 +4193,7 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, @@ -4429,7 +4203,7 @@ _mm512_unpacklo_ps(__m512 __a, __m512 __b) 0+12, 16+12, 1+12, 17+12); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4437,7 +4211,7 @@ _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4445,7 +4219,7 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, @@ -4455,7 +4229,7 @@ _mm512_unpackhi_epi32(__m512i __A, __m512i __B) 2+12, 18+12, 3+12, 19+12); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4463,7 +4237,7 @@ _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4471,7 +4245,7 @@ _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, @@ -4481,7 +4255,7 @@ _mm512_unpacklo_epi32(__m512i __A, __m512i __B) 0+12, 16+12, 1+12, 17+12); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4489,7 +4263,7 @@ _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4497,14 +4271,14 @@ _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4512,7 +4286,7 @@ _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4520,14 +4294,14 @@ _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4535,7 +4309,7 @@ _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4546,16 +4320,16 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) /* SIMD load ops */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512 (void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + struct __loadu_si512 { + __m512i __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_si512*)__P)->__v; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, @@ -4564,7 +4338,7 @@ _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, @@ -4573,7 +4347,7 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, @@ -4581,7 +4355,7 @@ _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, @@ -4590,7 +4364,7 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, @@ -4598,7 +4372,7 @@ _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, @@ -4607,7 +4381,7 @@ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, @@ -4615,7 +4389,7 @@ _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, @@ -4624,7 +4398,7 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p) { struct __loadu_pd { @@ -4633,7 +4407,7 @@ _mm512_loadu_pd(void const *__p) return ((struct __loadu_pd*)__p)->__v; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p) { struct __loadu_ps { @@ -4642,16 +4416,13 @@ _mm512_loadu_ps(void const *__p) return ((struct __loadu_ps*)__p)->__v; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p) { - return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return *(__m512*)__p; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, @@ -4659,7 +4430,7 @@ _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, @@ -4668,16 +4439,13 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p) { - return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return *(__m512d*)__p; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, @@ -4685,7 +4453,7 @@ _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, @@ -4694,19 +4462,19 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512 (void const *__P) { return *(__m512i *) __P; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32 (void const *__P) { return *(__m512i *) __P; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64 (void const *__P) { return *(__m512i *) __P; @@ -4714,90 +4482,98 @@ _mm512_load_epi64 (void const *__P) /* SIMD store ops */ -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512 (void *__P, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, - (__mmask16) -1); + struct __storeu_si512 { + __m512i __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si512*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A) { - __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); + struct __storeu_pd { + __m512d __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pd*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A) { - __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); + struct __storeu_ps { + __m512 __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_ps*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A) { *(__m512d*)__P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A) { *(__m512*)__P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64 (void *__P, __m512i __A) { *(__m512i *) __P = __A; @@ -4913,7 +4689,7 @@ _mm512_knot(__mmask16 __M) #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char @@ -4921,7 +4697,7 @@ _mm512_cvtepi8_epi32(__m128i __A) return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4929,7 +4705,7 @@ _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4937,7 +4713,7 @@ _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char @@ -4945,7 +4721,7 @@ _mm512_cvtepi8_epi64(__m128i __A) return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4953,7 +4729,7 @@ _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4961,13 +4737,13 @@ _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4975,7 +4751,7 @@ _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4983,13 +4759,13 @@ _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4997,7 +4773,7 @@ _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5005,13 +4781,13 @@ _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) (__v16si)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5019,7 +4795,7 @@ _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5027,13 +4803,13 @@ _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A) { return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5041,7 +4817,7 @@ _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5049,13 +4825,13 @@ _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A) { return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5063,7 +4839,7 @@ _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5071,13 +4847,13 @@ _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5085,7 +4861,7 @@ _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5093,13 +4869,13 @@ _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5107,7 +4883,7 @@ _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5115,13 +4891,13 @@ _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5129,7 +4905,7 @@ _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5137,228 +4913,195 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_rorv_epi32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_rorv_epi32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_rorv_epi64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_rorv_epi64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi32_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu32_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)); }) - -#define _mm512_rol_epi32(a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) - -#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) - -#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) - -#define _mm512_rol_epi64(a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) - -#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ - (__v8di)(__m512i)(W), (__mmask8)(U)); }) - -#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ - (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) -static __inline__ __m512i __DEFAULT_FN_ATTRS + (__mmask8)(m)) + +#define _mm512_rol_epi32(a, b) \ + (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) + +#define _mm512_mask_rol_epi32(W, U, a, b) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_rol_epi32((a), (b)), \ + (__v16si)(__m512i)(W)) + +#define _mm512_maskz_rol_epi32(U, a, b) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_rol_epi32((a), (b)), \ + (__v16si)_mm512_setzero_si512()) + +#define _mm512_rol_epi64(a, b) \ + (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) + +#define _mm512_mask_rol_epi64(W, U, a, b) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_rol_epi64((a), (b)), \ + (__v8di)(__m512i)(W)) + +#define _mm512_maskz_rol_epi64(U, a, b) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_rol_epi64((a), (b)), \ + (__v8di)_mm512_setzero_si512()) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_rolv_epi32(__A, __B), + (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_rolv_epi32(__A, __B), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_rolv_epi64(__A, __B), + (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_rolv_epi64(__A, __B), + (__v8di)_mm512_setzero_si512()); } -#define _mm512_ror_epi32(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) +#define _mm512_ror_epi32(A, B) \ + (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) -#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) +#define _mm512_mask_ror_epi32(W, U, A, B) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_ror_epi32((A), (B)), \ + (__v16si)(__m512i)(W)) -#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) +#define _mm512_maskz_ror_epi32(U, A, B) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_ror_epi32((A), (B)), \ + (__v16si)_mm512_setzero_si512()) -#define _mm512_ror_epi64(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) +#define _mm512_ror_epi64(A, B) \ + (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) -#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)(__m512i)(W), (__mmask8)(U)); }) +#define _mm512_mask_ror_epi64(W, U, A, B) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_ror_epi64((A), (B)), \ + (__v8di)(__m512i)(W)) -#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) +#define _mm512_maskz_ror_epi64(U, A, B) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_ror_epi64((A), (B)), \ + (__v8di)_mm512_setzero_si512()) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5366,20 +5109,20 @@ _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5387,7 +5130,7 @@ _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5395,13 +5138,13 @@ _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5409,20 +5152,20 @@ _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5430,7 +5173,7 @@ _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5438,7 +5181,7 @@ _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, @@ -5446,7 +5189,7 @@ _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, @@ -5455,14 +5198,14 @@ _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -5470,7 +5213,7 @@ _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) (__v16si) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -5478,7 +5221,7 @@ _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) (__v16si) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -5486,7 +5229,7 @@ _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) (__v8di) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -5494,7 +5237,7 @@ _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) (__v8di) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, @@ -5502,7 +5245,7 @@ _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, @@ -5511,21 +5254,21 @@ _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd (__m512d __A) { return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 0, 0, 2, 2, 4, 4, 6, 6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5533,7 +5276,7 @@ _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5541,179 +5284,179 @@ _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) (__v8df)_mm512_setzero_pd()); } -#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm512_fixupimm_pd(A, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm512_fixupimm_ps(A, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ +#define _mm_fixupimm_round_sd(A, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_sd(A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ +#define _mm_fixupimm_round_ss(A, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_ss(A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ +#define _mm_getexp_round_sd(A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5723,13 +5466,13 @@ _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_getexp_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5739,26 +5482,26 @@ _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ +#define _mm_maskz_getexp_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ +#define _mm_getexp_round_ss(A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, @@ -5768,117 +5511,117 @@ _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_getexp_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, - (__v4sf) _mm_setzero_pd (), + (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ +#define _mm_maskz_getexp_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ +#define _mm_getmant_round_sd(A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ +#define _mm_getmant_sd(A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ +#define _mm_mask_getmant_sd(W, U, A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ +#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ +#define _mm_maskz_getmant_sd(U, A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ +#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ +#define _mm_getmant_round_ss(A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ +#define _mm_getmant_ss(A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ +#define _mm_mask_getmant_ss(W, U, A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ +#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ +#define _mm_maskz_getmant_ss(U, A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_pd(), \ + (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ +#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) @@ -5886,37 +5629,26 @@ _mm512_kmov (__mmask16 __A) return __A; } -#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ +#define _mm_comi_round_sd(A, B, P, R) \ (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ - (int)(P), (int)(R)); }) + (int)(P), (int)(R)) -#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ +#define _mm_comi_round_ss(A, B, P, R) \ (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ - (int)(P), (int)(R)); }) + (int)(P), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_si64(A, R) \ + (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, - __mmask16 __U, __m512i __B) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sll_epi32(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, - (__v16si) __I - /* idx */ , - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sll_epi32(__m512i __A, __m128i __B) -{ - return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5924,7 +5656,7 @@ _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5932,13 +5664,13 @@ _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5946,7 +5678,7 @@ _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5954,13 +5686,13 @@ _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5968,7 +5700,7 @@ _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5976,13 +5708,13 @@ _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5990,7 +5722,7 @@ _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5998,13 +5730,13 @@ _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6012,7 +5744,7 @@ _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6020,13 +5752,13 @@ _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6034,7 +5766,7 @@ _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6042,13 +5774,13 @@ _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6056,7 +5788,7 @@ _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6064,13 +5796,13 @@ _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6078,7 +5810,7 @@ _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6086,13 +5818,13 @@ _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6100,7 +5832,7 @@ _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6108,13 +5840,13 @@ _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6122,7 +5854,7 @@ _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6130,13 +5862,13 @@ _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6144,7 +5876,7 @@ _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -6152,13 +5884,13 @@ _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6166,7 +5898,7 @@ _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6174,57 +5906,57 @@ _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm512_ternarylogic_epi32(A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U)); }) + (int)(imm), (__mmask16)(U)) -#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm512_ternarylogic_epi64(A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #ifdef __x86_64__ -#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_i64(A, R) \ + (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) #endif -#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_si32(A, R) \ + (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_i32(A, R) \ + (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, @@ -6232,11 +5964,11 @@ _mm_cvtsd_u32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ +#define _mm_cvt_roundsd_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)); }) + (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) @@ -6245,24 +5977,24 @@ _mm_cvtsd_u64 (__m128d __A) } #endif -#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_si32(A, R) \ + (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_i32(A, R) \ + (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_si64(A, R) \ + (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_i64(A, R) \ + (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) #endif -#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, @@ -6270,11 +6002,11 @@ _mm_cvtss_u32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ +#define _mm_cvt_roundss_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ - (int)(R)); }) + (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) @@ -6283,13 +6015,13 @@ _mm_cvtss_u64 (__m128 __A) } #endif -#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_i32(A, R) \ + (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_si32(A, R) \ + (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32 (__m128d __A) { return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, @@ -6297,13 +6029,13 @@ _mm_cvttsd_i32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_si64(A, R) \ + (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_i64(A, R) \ + (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_i64 (__m128d __A) { return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, @@ -6311,10 +6043,10 @@ _mm_cvttsd_i64 (__m128d __A) } #endif -#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, @@ -6322,11 +6054,11 @@ _mm_cvttsd_u32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ +#define _mm_cvtt_roundsd_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)); }) + (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) @@ -6335,13 +6067,13 @@ _mm_cvttsd_u64 (__m128d __A) } #endif -#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_i32(A, R) \ + (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_si32(A, R) \ + (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32 (__m128 __A) { return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, @@ -6349,13 +6081,13 @@ _mm_cvttss_i32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_i64(A, R) \ + (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_si64(A, R) \ + (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttss_i64 (__m128 __A) { return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, @@ -6363,10 +6095,10 @@ _mm_cvttss_i64 (__m128 __A) } #endif -#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, @@ -6374,11 +6106,11 @@ _mm_cvttss_u32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ +#define _mm_cvtt_roundss_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ - (int)(R)); }) + (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) @@ -6387,98 +6119,39 @@ _mm_cvttss_u64 (__m128 __A) } #endif -static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, - __m512d __B) -{ - return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, - (__v8di) __I - /* idx */ , - (__v8df) __B, - (__mmask8) __U); -} - -static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, - __m512 __B) -{ - return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, - (__v16si) __I - /* idx */ , - (__v16sf) __B, - (__mmask16) __U); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, - __mmask8 __U, __m512i __B) -{ - return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, - (__v8di) __I - /* idx */ , - (__v8di) __B, - (__mmask8) __U); -} - -#define _mm512_permute_pd(X, C) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ - (__v8df)_mm512_undefined_pd(), \ - 0 + (((C) >> 0) & 0x1), \ - 0 + (((C) >> 1) & 0x1), \ - 2 + (((C) >> 2) & 0x1), \ - 2 + (((C) >> 3) & 0x1), \ - 4 + (((C) >> 4) & 0x1), \ - 4 + (((C) >> 5) & 0x1), \ - 6 + (((C) >> 6) & 0x1), \ - 6 + (((C) >> 7) & 0x1)); }) +#define _mm512_permute_pd(X, C) \ + (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) -#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm512_mask_permute_pd(W, U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permute_pd(U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_permute_ps(X, C) __extension__ ({ \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ - (__v16sf)_mm512_undefined_ps(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3), \ - 8 + (((C) >> 0) & 0x3), \ - 8 + (((C) >> 2) & 0x3), \ - 8 + (((C) >> 4) & 0x3), \ - 8 + (((C) >> 6) & 0x3), \ - 12 + (((C) >> 0) & 0x3), \ - 12 + (((C) >> 2) & 0x3), \ - 12 + (((C) >> 4) & 0x3), \ - 12 + (((C) >> 6) & 0x3)); }) - -#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_permute_ps(X, C) \ + (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) + +#define _mm512_mask_permute_ps(W, U, X, C) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permute_ps(U, X, C) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -6486,7 +6159,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -6494,13 +6167,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -6508,7 +6181,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -6516,85 +6189,87 @@ _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) { - return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I - /* idx */ , - (__v8df) __A, - (__v8df) __B, - (__mmask8) -1); + return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, + (__v8df)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) { - return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I - /* idx */ , - (__v8df) __A, - (__v8df) __B, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_permutex2var_pd(__A, __I, __B), + (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, - __m512d __B) +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, + __m512d __B) { - return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I - /* idx */ , - (__v8df) __A, - (__v8df) __B, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_permutex2var_pd(__A, __I, __B), + (__v8df)(__m512d)__I); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, + __m512d __B) +{ + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_permutex2var_pd(__A, __I, __B), + (__v8df)_mm512_setzero_pd()); +} + +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) { - return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I - /* idx */ , - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) -1); + return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, + (__v16sf) __B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) +{ + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), + (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) { - return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I - /* idx */ , - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), + (__v16sf)(__m512)__I); } -static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, - __m512 __B) +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) { - return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I - /* idx */ , - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), + (__v16sf)_mm512_setzero_ps()); } -#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epu32(A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6604,7 +6279,7 @@ _mm512_cvttpd_epu32 (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6613,7 +6288,7 @@ _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6623,109 +6298,109 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ +#define _mm_roundscale_round_sd(A, B, imm, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ +#define _mm_roundscale_sd(A, B, imm) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_roundscale_sd(W, U, A, B, imm) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ +#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_roundscale_sd(U, A, B, I) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ +#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ +#define _mm_roundscale_round_ss(A, B, imm, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ +#define _mm_roundscale_ss(A, B, imm) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ +#define _mm_mask_roundscale_ss(W, U, A, B, I) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ +#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_roundscale_ss(U, A, B, I) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ +#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_scalef_round_pd(A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_scalef_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd (__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6736,7 +6411,7 @@ _mm512_scalef_pd (__m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6746,7 +6421,7 @@ _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6757,25 +6432,25 @@ _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_scalef_round_ps(A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_scalef_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps (__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6786,7 +6461,7 @@ _mm512_scalef_ps (__m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6796,7 +6471,7 @@ _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6807,13 +6482,13 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ +#define _mm_scalef_round_sd(A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, @@ -6822,7 +6497,7 @@ _mm_scalef_sd (__m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6832,13 +6507,13 @@ _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_scalef_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6848,19 +6523,19 @@ _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_scalef_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ +#define _mm_scalef_round_ss(A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, @@ -6869,7 +6544,7 @@ _mm_scalef_ss (__m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -6879,13 +6554,13 @@ _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_scalef_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -6895,211 +6570,147 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_scalef_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ - (__v16si)_mm512_srai_epi32(__A, __B), \ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ - (__v16si)_mm512_srai_epi32(__A, __B), \ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ - (__v8di)_mm512_srai_epi64(__A, __B), \ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) { - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ - (__v8di)_mm512_srai_epi64(__A, __B), \ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - 0 + ((((imm) >> 0) & 0x3) * 4), \ - 1 + ((((imm) >> 0) & 0x3) * 4), \ - 2 + ((((imm) >> 0) & 0x3) * 4), \ - 3 + ((((imm) >> 0) & 0x3) * 4), \ - 0 + ((((imm) >> 2) & 0x3) * 4), \ - 1 + ((((imm) >> 2) & 0x3) * 4), \ - 2 + ((((imm) >> 2) & 0x3) * 4), \ - 3 + ((((imm) >> 2) & 0x3) * 4), \ - 16 + ((((imm) >> 4) & 0x3) * 4), \ - 17 + ((((imm) >> 4) & 0x3) * 4), \ - 18 + ((((imm) >> 4) & 0x3) * 4), \ - 19 + ((((imm) >> 4) & 0x3) * 4), \ - 16 + ((((imm) >> 6) & 0x3) * 4), \ - 17 + ((((imm) >> 6) & 0x3) * 4), \ - 18 + ((((imm) >> 6) & 0x3) * 4), \ - 19 + ((((imm) >> 6) & 0x3) * 4)); }) - -#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_shuffle_f32x4(A, B, imm) \ + (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(imm)) + +#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) - -#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - 0 + ((((imm) >> 0) & 0x3) * 2), \ - 1 + ((((imm) >> 0) & 0x3) * 2), \ - 0 + ((((imm) >> 2) & 0x3) * 2), \ - 1 + ((((imm) >> 2) & 0x3) * 2), \ - 8 + ((((imm) >> 4) & 0x3) * 2), \ - 9 + ((((imm) >> 4) & 0x3) * 2), \ - 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) - -#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ + (__v16sf)_mm512_setzero_ps()) + +#define _mm512_shuffle_f64x2(A, B, imm) \ + (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(imm)) + +#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - 0 + ((((imm) >> 0) & 0x3) * 2), \ - 1 + ((((imm) >> 0) & 0x3) * 2), \ - 0 + ((((imm) >> 2) & 0x3) * 2), \ - 1 + ((((imm) >> 2) & 0x3) * 2), \ - 8 + ((((imm) >> 4) & 0x3) * 2), \ - 9 + ((((imm) >> 4) & 0x3) * 2), \ - 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) - -#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_shuffle_i32x4(A, B, imm) \ + (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(imm)) + +#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) - -#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - 0 + ((((imm) >> 0) & 0x3) * 2), \ - 1 + ((((imm) >> 0) & 0x3) * 2), \ - 0 + ((((imm) >> 2) & 0x3) * 2), \ - 1 + ((((imm) >> 2) & 0x3) * 2), \ - 8 + ((((imm) >> 4) & 0x3) * 2), \ - 9 + ((((imm) >> 4) & 0x3) * 2), \ - 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) - -#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ + (__v16si)_mm512_setzero_si512()) + +#define _mm512_shuffle_i64x2(A, B, imm) \ + (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(imm)) + +#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) - -#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - 0 + (((M) >> 0) & 0x1), \ - 8 + (((M) >> 1) & 0x1), \ - 2 + (((M) >> 2) & 0x1), \ - 10 + (((M) >> 3) & 0x1), \ - 4 + (((M) >> 4) & 0x1), \ - 12 + (((M) >> 5) & 0x1), \ - 6 + (((M) >> 6) & 0x1), \ - 14 + (((M) >> 7) & 0x1)); }) - -#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ + (__v8di)_mm512_setzero_si512()) + +#define _mm512_shuffle_pd(A, B, M) \ + (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(M)) + +#define _mm512_mask_shuffle_pd(W, U, A, B, M) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm512_maskz_shuffle_pd(U, A, B, M) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - 0 + (((M) >> 0) & 0x3), \ - 0 + (((M) >> 2) & 0x3), \ - 16 + (((M) >> 4) & 0x3), \ - 16 + (((M) >> 6) & 0x3), \ - 4 + (((M) >> 0) & 0x3), \ - 4 + (((M) >> 2) & 0x3), \ - 20 + (((M) >> 4) & 0x3), \ - 20 + (((M) >> 6) & 0x3), \ - 8 + (((M) >> 0) & 0x3), \ - 8 + (((M) >> 2) & 0x3), \ - 24 + (((M) >> 4) & 0x3), \ - 24 + (((M) >> 6) & 0x3), \ - 12 + (((M) >> 0) & 0x3), \ - 12 + (((M) >> 2) & 0x3), \ - 28 + (((M) >> 4) & 0x3), \ - 28 + (((M) >> 6) & 0x3)); }) - -#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_shuffle_ps(A, B, M) \ + (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(M)) + +#define _mm512_mask_shuffle_ps(W, U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm512_maskz_shuffle_ps(U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ +#define _mm_sqrt_round_sd(A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -7109,13 +6720,13 @@ _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -7125,19 +6736,19 @@ _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sqrt_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ +#define _mm_sqrt_round_ss(A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -7147,13 +6758,13 @@ _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -7163,13 +6774,13 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sqrt_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, @@ -7177,7 +6788,7 @@ _mm512_broadcast_f32x4(__m128 __A) 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -7185,7 +6796,7 @@ _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -7193,14 +6804,14 @@ _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A) { return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -7208,7 +6819,7 @@ _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -7216,7 +6827,7 @@ _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, @@ -7224,7 +6835,7 @@ _mm512_broadcast_i32x4(__m128i __A) 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -7232,7 +6843,7 @@ _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -7240,14 +6851,14 @@ _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A) { return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -7255,7 +6866,7 @@ _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -7263,7 +6874,7 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, @@ -7271,7 +6882,7 @@ _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) (__v8df) __O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, @@ -7279,7 +6890,7 @@ _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) (__v8df) _mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, @@ -7287,7 +6898,7 @@ _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) (__v16sf) __O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, @@ -7295,7 +6906,7 @@ _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) (__v16sf) _mm512_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, @@ -7303,14 +6914,14 @@ _mm512_cvtsepi32_epi8 (__m512i __A) (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, @@ -7318,13 +6929,13 @@ _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, @@ -7332,14 +6943,14 @@ _mm512_cvtsepi32_epi16 (__m512i __A) (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, @@ -7347,13 +6958,13 @@ _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, @@ -7361,14 +6972,14 @@ _mm512_cvtsepi64_epi8 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, @@ -7376,13 +6987,13 @@ _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, @@ -7390,14 +7001,14 @@ _mm512_cvtsepi64_epi32 (__m512i __A) (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, @@ -7405,13 +7016,13 @@ _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, @@ -7419,14 +7030,14 @@ _mm512_cvtsepi64_epi16 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, @@ -7434,13 +7045,13 @@ _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7448,7 +7059,7 @@ _mm512_cvtusepi32_epi8 (__m512i __A) (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7456,7 +7067,7 @@ _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7464,13 +7075,13 @@ _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7478,7 +7089,7 @@ _mm512_cvtusepi32_epi16 (__m512i __A) (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7486,7 +7097,7 @@ _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7494,13 +7105,13 @@ _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7508,7 +7119,7 @@ _mm512_cvtusepi64_epi8 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7516,7 +7127,7 @@ _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7524,13 +7135,13 @@ _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, @@ -7538,14 +7149,14 @@ _mm512_cvtusepi64_epi32 (__m512i __A) (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, @@ -7553,13 +7164,13 @@ _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, @@ -7567,14 +7178,14 @@ _mm512_cvtusepi64_epi16 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, @@ -7582,13 +7193,13 @@ _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, @@ -7596,14 +7207,14 @@ _mm512_cvtepi32_epi8 (__m512i __A) (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, @@ -7611,13 +7222,13 @@ _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, @@ -7625,14 +7236,14 @@ _mm512_cvtepi32_epi16 (__m512i __A) (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, @@ -7640,13 +7251,13 @@ _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, @@ -7654,14 +7265,14 @@ _mm512_cvtepi64_epi8 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, @@ -7669,13 +7280,13 @@ _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, @@ -7683,14 +7294,14 @@ _mm512_cvtepi64_epi32 (__m512i __A) (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, @@ -7698,13 +7309,13 @@ _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, @@ -7712,14 +7323,14 @@ _mm512_cvtepi64_epi16 (__m512i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, @@ -7727,246 +7338,192 @@ _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - 0 + ((imm) & 0x3) * 4, \ - 1 + ((imm) & 0x3) * 4, \ - 2 + ((imm) & 0x3) * 4, \ - 3 + ((imm) & 0x3) * 4); }) - -#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)); }) - -#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ - (__v4si)_mm_setzero_si128()); }) - -#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_undefined_epi32(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) - -#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ - (__v4di)(W)); }) - -#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) - -#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) - -#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_extracti32x4_epi32(A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)_mm_undefined_si128(), \ + (__mmask8)-1) + +#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)(__m128i)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)(U)) + +#define _mm512_extracti64x4_epi64(A, imm) \ + (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)_mm256_undefined_si256(), \ + (__mmask8)-1) + +#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ + (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)(__m256i)(W), \ + (__mmask8)(U)) + +#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ + (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)_mm256_setzero_si256(), \ + (__mmask8)(U)) + +#define _mm512_insertf64x4(A, B, imm) \ + (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ + (__v4df)(__m256d)(B), (int)(imm)) + +#define _mm512_mask_insertf64x4(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf64x4(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) - -#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_inserti64x4(A, B, imm) \ + (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ + (__v4di)(__m256i)(B), (int)(imm)) + +#define _mm512_mask_inserti64x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti64x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) - -#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ - (((imm) & 0x3) == 0) ? 16 : 0, \ - (((imm) & 0x3) == 0) ? 17 : 1, \ - (((imm) & 0x3) == 0) ? 18 : 2, \ - (((imm) & 0x3) == 0) ? 19 : 3, \ - (((imm) & 0x3) == 1) ? 16 : 4, \ - (((imm) & 0x3) == 1) ? 17 : 5, \ - (((imm) & 0x3) == 1) ? 18 : 6, \ - (((imm) & 0x3) == 1) ? 19 : 7, \ - (((imm) & 0x3) == 2) ? 16 : 8, \ - (((imm) & 0x3) == 2) ? 17 : 9, \ - (((imm) & 0x3) == 2) ? 18 : 10, \ - (((imm) & 0x3) == 2) ? 19 : 11, \ - (((imm) & 0x3) == 3) ? 16 : 12, \ - (((imm) & 0x3) == 3) ? 17 : 13, \ - (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15); }) - -#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ + (__v8di)_mm512_setzero_si512()) + +#define _mm512_insertf32x4(A, B, imm) \ + (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ + (__v4sf)(__m128)(B), (int)(imm)) + +#define _mm512_mask_insertf32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf32x4(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) - -#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ - (((imm) & 0x3) == 0) ? 16 : 0, \ - (((imm) & 0x3) == 0) ? 17 : 1, \ - (((imm) & 0x3) == 0) ? 18 : 2, \ - (((imm) & 0x3) == 0) ? 19 : 3, \ - (((imm) & 0x3) == 1) ? 16 : 4, \ - (((imm) & 0x3) == 1) ? 17 : 5, \ - (((imm) & 0x3) == 1) ? 18 : 6, \ - (((imm) & 0x3) == 1) ? 19 : 7, \ - (((imm) & 0x3) == 2) ? 16 : 8, \ - (((imm) & 0x3) == 2) ? 17 : 9, \ - (((imm) & 0x3) == 2) ? 18 : 10, \ - (((imm) & 0x3) == 2) ? 19 : 11, \ - (((imm) & 0x3) == 3) ? 16 : 12, \ - (((imm) & 0x3) == 3) ? 17 : 13, \ - (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15); }) - -#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ + (__v16sf)_mm512_setzero_ps()) + +#define _mm512_inserti32x4(A, B, imm) \ + (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) + +#define _mm512_mask_inserti32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti32x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) -#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_getmant_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_getmant_pd(A, B, C) __extension__ ({ \ +#define _mm512_getmant_pd(A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_getmant_pd(W, U, A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_getmant_pd(U, A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_getmant_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm512_getmant_ps(A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_getmant_ps(W, U, A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_getmant_ps(U, A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_getexp_round_pd(A, R) __extension__ ({ \ +#define _mm512_getexp_round_pd(A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_getexp_round_pd(W, U, A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_getexp_round_pd(U, A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd (__m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7975,7 +7532,7 @@ _mm512_getexp_pd (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7984,7 +7541,7 @@ _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7993,22 +7550,22 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_getexp_round_ps(A, R) __extension__ ({ \ +#define _mm512_getexp_round_ps(A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_getexp_round_ps(W, U, A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_getexp_round_ps(U, A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps (__m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -8017,7 +7574,7 @@ _mm512_getexp_ps (__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -8026,7 +7583,7 @@ _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -8035,721 +7592,731 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ +#define _mm512_i64gather_ps(index, addr, scale) \ (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ (float const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\ +#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ (float const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ - (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ +#define _mm512_i64gather_epi32(index, addr, scale) \ + (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ (int const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)-1, (int)(scale)); }) + (__mmask8)-1, (int)(scale)) -#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ (int const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ +#define _mm512_i64gather_pd(index, addr, scale) \ (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ (double const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ (double const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ - (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ +#define _mm512_i64gather_epi64(index, addr, scale) \ + (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ (long long const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ (long long const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_ps(index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ (float const *)(addr), \ (__v16sf)(__m512)(index), \ - (__mmask16)-1, (int)(scale)); }) + (__mmask16)-1, (int)(scale)) -#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ (float const *)(addr), \ (__v16sf)(__m512)(index), \ - (__mmask16)(mask), (int)(scale)); }) + (__mmask16)(mask), (int)(scale)) -#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_epi32(index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ (int const *)(addr), \ (__v16si)(__m512i)(index), \ - (__mmask16)-1, (int)(scale)); }) + (__mmask16)-1, (int)(scale)) -#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ (int const *)(addr), \ (__v16si)(__m512i)(index), \ - (__mmask16)(mask), (int)(scale)); }) + (__mmask16)(mask), (int)(scale)) -#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_pd(index, addr, scale) \ (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ (double const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ (double const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_epi64(index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ (long long const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ (long long const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8sf)(__m256)(v1), (int)(scale)); }) + (__v8sf)(__m256)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8sf)(__m256)(v1), (int)(scale)); }) + (__v8sf)(__m256)(v1), (int)(scale)) -#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) + (__v8si)(__m256i)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) + (__v8si)(__m256i)(v1), (int)(scale)) -#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ - (__v16sf)(__m512)(v1), (int)(scale)); }) + (__v16sf)(__m512)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ - (__v16sf)(__m512)(v1), (int)(scale)); }) + (__v16sf)(__m512)(v1), (int)(scale)) -#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ - (__v16si)(__m512i)(v1), (int)(scale)); }) + (__v16si)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ - (__v16si)(__m512i)(v1), (int)(scale)); }) + (__v16si)(__m512i)(v1), (int)(scale)) -#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, - (__v4sf) __A, - (__v4sf) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, + (__v4sf)__A, + (__v4sf)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ +#define _mm_fmadd_round_ss(A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, + (__v4sf)__B, + (__v4sf)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, - (__v4sf) __X, - (__v4sf) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, + (__v4sf)__X, + (__v4sf)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, - (__v4sf) __A, - -(__v4sf) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, + (__v4sf)__A, + -(__v4sf)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_fmsub_round_ss(A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + -(__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, + (__v4sf)__B, + -(__v4sf)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, - (__v4sf) __X, - (__v4sf) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, + (__v4sf)__X, + (__v4sf)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, - -(__v4sf) __A, - (__v4sf) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, + -(__v4sf)__A, + (__v4sf)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_fnmadd_round_ss(A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, + -(__v4sf)__B, + (__v4sf)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ - (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ +#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, - (__v4sf) __X, - (__v4sf) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, + -(__v4sf)__X, + (__v4sf)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ - (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(X), \ +#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ + (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, - -(__v4sf) __A, - -(__v4sf) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, + -(__v4sf)__A, + -(__v4sf)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_fnmsub_round_ss(A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, + -(__v4sf)__B, + -(__v4sf)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ - (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ +#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ + (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W, - (__v4sf) __X, - (__v4sf) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, + -(__v4sf)__X, + (__v4sf)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ - (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(X), \ +#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ + (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, - (__v2df) __A, - (__v2df) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, + (__v2df)__A, + (__v2df)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ +#define _mm_fmadd_round_sd(A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, + (__v2df)__B, + (__v2df)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, - (__v2df) __X, - (__v2df) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, + (__v2df)__X, + (__v2df)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, - (__v2df) __A, - -(__v2df) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, + (__v2df)__A, + -(__v2df)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_fmsub_round_sd(A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, + (__v2df)__B, + -(__v2df)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, - (__v2df) __X, - (__v2df) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, + (__v2df)__X, + (__v2df)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, - -(__v2df) __A, - (__v2df) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, + -(__v2df)__A, + (__v2df)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_fnmadd_round_sd(A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, + -(__v2df)__B, + (__v2df)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ - (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ +#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, - (__v2df) __X, - (__v2df) __Y, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, + -(__v2df)__X, + (__v2df)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ - (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(X), \ +#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, - -(__v2df) __A, - -(__v2df) __B, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, + -(__v2df)__A, + -(__v2df)__B, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_fnmsub_round_sd(A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R)) + +#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, + -(__v2df)__B, + -(__v2df)__C, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ - (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ +#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ + (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W), - (__v2df) __X, - (__v2df) (__Y), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, + -(__v2df)__X, + (__v2df)__Y, + (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ - (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(X), \ +#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ + (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)); }) - -#define _mm512_permutex_pd(X, C) __extension__ ({ \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ - (__v8df)_mm512_undefined_pd(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) - -#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ + (__mmask8)(U), (int)(R)) + +#define _mm512_permutex_pd(X, C) \ + (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) + +#define _mm512_mask_permutex_pd(W, U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permutex_pd(U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()); }) - -#define _mm512_permutex_epi64(X, C) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ - (__v8di)_mm512_undefined_epi32(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) - -#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ + (__v8df)_mm512_setzero_pd()) + +#define _mm512_permutex_epi64(X, C) \ + (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) + +#define _mm512_mask_permutex_epi64(W, U, X, C) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permutex_epi64(U, X, C) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) { - return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, - (__v8di) __X, - (__v8df) _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { - return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, - (__v8di) __X, - (__v8df) __W, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_permutexvar_pd(__X, __Y), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) { - return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, - (__v8di) __X, - (__v8df) _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_permutexvar_pd(__X, __Y), + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, - (__v8di) __X, - (__v8di) _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, - (__v8di) __X, - (__v8di) _mm512_undefined_epi32 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_permutexvar_epi64(__X, __Y), + (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, - (__v8di) __X, - (__v8di) __W, - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_permutexvar_epi64(__X, __Y), + (__v8di)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) { - return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, - (__v16si) __X, - (__v16sf) _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { - return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, - (__v16si) __X, - (__v16sf) __W, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_permutexvar_ps(__X, __Y), + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) { - return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, - (__v16si) __X, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) __U); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) -{ - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, - (__v16si) __X, - (__v16si) _mm512_setzero_si512 (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_permutexvar_ps(__X, __Y), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, - (__v16si) __X, - (__v16si) _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); } #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) +{ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_permutexvar_epi32(__X, __Y), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, - (__v16si) __X, - (__v16si) __W, - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_permutexvar_epi32(__X, __Y), + (__v16si)__W); } #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 @@ -8784,6 +8351,24 @@ _mm512_kortestz (__mmask16 __A, __mmask16 __B) return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); } +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) +{ + return (unsigned char)__builtin_ia32_kortestchi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) +{ + return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); + return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); +} + static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb (__mmask16 __A, __mmask16 __B) { @@ -8802,35 +8387,68 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } +#define _kand_mask16 _mm512_kand +#define _kandn_mask16 _mm512_kandn +#define _knot_mask16 _mm512_knot +#define _kor_mask16 _mm512_kor +#define _kxnor_mask16 _mm512_kxnor +#define _kxor_mask16 _mm512_kxor + +#define _kshiftli_mask16(A, I) \ + (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) + +#define _kshiftri_mask16(A, I) \ + (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_cvtmask16_u32(__mmask16 __A) { + return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_cvtu32_mask16(unsigned int __A) { + return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_load_mask16(__mmask16 *__A) { + return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A); +} + static __inline__ void __DEFAULT_FN_ATTRS +_store_mask16(__mmask16 *__A, __mmask16 __B) { + *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B); +} + +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512 (__m512i * __P, __m512i __A) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512 (void const *__P) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd (double *__P, __m512d __A) { typedef __v8df __v8df_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps (float *__P, __m512 __A) { typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, @@ -8838,7 +8456,7 @@ _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, @@ -8847,7 +8465,7 @@ _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, @@ -8855,7 +8473,7 @@ _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, @@ -8864,7 +8482,7 @@ _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, @@ -8872,7 +8490,7 @@ _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, @@ -8881,7 +8499,7 @@ _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, @@ -8889,7 +8507,7 @@ _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, @@ -8898,116 +8516,116 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) (__mmask16) __U); } -#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ +#define _mm_cmp_round_ss_mask(X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ +#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ +#define _mm_cmp_ss_mask(X, Y, P) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ +#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ +#define _mm_cmp_round_sd_mask(X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ +#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ +#define _mm_cmp_sd_mask(X, Y, P) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ +#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) /* Bit Test */ -static __inline __mmask16 __DEFAULT_FN_ATTRS +static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline __mmask8 __DEFAULT_FN_ATTRS +static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -9015,7 +8633,7 @@ _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -9023,14 +8641,14 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -9038,7 +8656,7 @@ _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -9046,132 +8664,94 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - __m128 res = __A; - res[0] = (__U & 1) ? __B[0] : __W[0]; - return res; + return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { - __m128 res = __A; - res[0] = (__U & 1) ? __B[0] : 0; - return res; + return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), + _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - __m128d res = __A; - res[0] = (__U & 1) ? __B[0] : __W[0]; - return res; + return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { - __m128d res = __A; - res[0] = (__U & 1) ? __B[0] : 0; - return res; + return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), + _mm_setzero_pd()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) { - __builtin_ia32_storess128_mask ((__v16sf *)__W, - (__v16sf) _mm512_castps128_ps512(__A), - (__mmask16) __U & (__mmask16)1); + __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) { - __builtin_ia32_storesd128_mask ((__v8df *)__W, - (__v8df) _mm512_castpd128_pd512(__A), - (__mmask8) __U & 1); + __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) { __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, - (__v4sf) {0.0, 0.0, 0.0, 0.0}, + (__v4sf)_mm_setzero_ps(), 0, 4, 4, 4); - return (__m128) __builtin_shufflevector( - __builtin_ia32_loadss128_mask ((__v16sf *) __A, - (__v16sf) _mm512_castps128_ps512(src), - (__mmask16) __U & 1), - _mm512_undefined_ps(), 0, 1, 2, 3); + return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss (__mmask8 __U, const float* __A) { - return (__m128) __builtin_shufflevector( - __builtin_ia32_loadss128_mask ((__v16sf *) __A, - (__v16sf) _mm512_setzero_ps(), - (__mmask16) __U & 1), - _mm512_undefined_ps(), 0, 1, 2, 3); + return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A, + (__v4sf) _mm_setzero_ps(), + __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) { __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, - (__v2df) {0.0, 0.0}, 0, 2); + (__v2df)_mm_setzero_pd(), + 0, 2); - return (__m128d) __builtin_shufflevector( - __builtin_ia32_loadsd128_mask ((__v8df *) __A, - (__v8df) _mm512_castpd128_pd512(src), - (__mmask8) __U & 1), - _mm512_undefined_pd(), 0, 1); + return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd (__mmask8 __U, const double* __A) { - return (__m128d) __builtin_shufflevector( - __builtin_ia32_loadsd128_mask ((__v8df *) __A, - (__v8df) _mm512_setzero_pd(), - (__mmask8) __U & 1), - _mm512_undefined_pd(), 0, 1); -} - -#define _mm512_shuffle_epi32(A, I) __extension__ ({ \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - 0 + (((I) >> 0) & 0x3), \ - 0 + (((I) >> 2) & 0x3), \ - 0 + (((I) >> 4) & 0x3), \ - 0 + (((I) >> 6) & 0x3), \ - 4 + (((I) >> 0) & 0x3), \ - 4 + (((I) >> 2) & 0x3), \ - 4 + (((I) >> 4) & 0x3), \ - 4 + (((I) >> 6) & 0x3), \ - 8 + (((I) >> 0) & 0x3), \ - 8 + (((I) >> 2) & 0x3), \ - 8 + (((I) >> 4) & 0x3), \ - 8 + (((I) >> 6) & 0x3), \ - 12 + (((I) >> 0) & 0x3), \ - 12 + (((I) >> 2) & 0x3), \ - 12 + (((I) >> 4) & 0x3), \ - 12 + (((I) >> 6) & 0x3)); }) - -#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ + return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, + (__v2df) _mm_setzero_pd(), + __U & 1); +} + +#define _mm512_shuffle_epi32(A, I) \ + (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) + +#define _mm512_mask_shuffle_epi32(W, U, A, I) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ +#define _mm512_maskz_shuffle_epi32(U, A, I) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -9179,7 +8759,7 @@ _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -9187,7 +8767,7 @@ _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, @@ -9195,15 +8775,15 @@ _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_pd (), + (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, @@ -9211,7 +8791,7 @@ _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, @@ -9219,7 +8799,7 @@ _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, @@ -9227,15 +8807,15 @@ _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, - (__v8di) _mm512_setzero_pd(), + (__v8di) _mm512_setzero_si512(), (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, @@ -9243,7 +8823,7 @@ _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, @@ -9251,7 +8831,7 @@ _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, @@ -9259,15 +8839,15 @@ _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, - (__v16si) _mm512_setzero_ps(), + (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -9275,7 +8855,7 @@ _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -9283,7 +8863,7 @@ _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, @@ -9291,71 +8871,64 @@ _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_ps(), + (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } -#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_pd(A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd (__m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtps_pd(__A), + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtps_pd(__A), + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd (__m512 __A) { - return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); + return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) { - return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); + return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -9363,7 +8936,7 @@ _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) (__v8df) __W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -9371,7 +8944,7 @@ _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) (__v8df) _mm512_setzero_pd ()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -9379,7 +8952,7 @@ _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) (__v16sf) __W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -9387,68 +8960,68 @@ _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) (__v16sf) _mm512_setzero_ps ()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) __U); } -#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsd_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), - (__v2df)(__B), - (__v4sf)(__W), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + (__v2df)__B, + (__v4sf)__W, + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), - (__v2df)(__B), + return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + (__v2df)__B, (__v4sf)_mm_setzero_ps(), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvtss_i32 _mm_cvtss_si32 @@ -9463,111 +9036,112 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) #endif #ifdef __x86_64__ -#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundi64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsi64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) #endif -#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) +#define _mm_cvt_roundsi32_ss(A, B, R) \ + (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) -#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) +#define _mm_cvt_roundi32_ss(A, B, R) \ + (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsi64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundi64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) #endif -#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundss_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), - (__v4sf)(__B), - (__v2df)(__W), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + (__v4sf)__B, + (__v2df)__W, + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), - (__v4sf)(__B), - (__v2df)_mm_setzero_pd(), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + (__v4sf)__B, + (__v2df)_mm_setzero_pd(), + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd (__m128d __A, unsigned __B) { - return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); + __A[0] = __B; + return __A; } #ifdef __x86_64__ -#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ - (unsigned long long)(B), (int)(R)); }) + (unsigned long long)(B), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { - return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif -#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu32_ss(A, B, R) \ (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ - (int)(R)); }) + (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss (__m128 __A, unsigned __B) { - return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #ifdef __x86_64__ -#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ - (unsigned long long)(B), (int)(R)); }) + (unsigned long long)(B), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { - return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, @@ -9575,17 +9149,15 @@ _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) (__v16si) __O); } -#ifdef __x86_64__ -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, (__v8di) _mm512_set1_epi64(__A), (__v8di) __O); } -#endif -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, @@ -9609,7 +9181,7 @@ _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, @@ -9624,7 +9196,7 @@ _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32 (int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, @@ -9640,7 +9212,7 @@ _mm512_set_epi32 (int __A, int __B, int __C, int __D, _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ (e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H) @@ -9652,7 +9224,7 @@ _mm512_set_epi64 (long long __A, long long __B, long long __C, #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { @@ -9663,7 +9235,7 @@ _mm512_set_pd (double __A, double __B, double __C, double __D, #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, @@ -9678,556 +9250,402 @@ _mm512_set_ps (float __A, float __B, float __C, float __D, _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ (e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A) { return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) { return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A) { return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) { return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); } -// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as -// outputs. This class of vector operation forms the basis of many scientific -// computations. In vector-reduction arithmetic, the evaluation off is -// independent of the order of the input elements of V. - -// Used bisection method. At each step, we partition the vector with previous -// step in half, and the operation is performed on its two halves. -// This takes log2(n) steps where n is the number of elements in the vector. - -// Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&,| -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for double. - -#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ - __extension__({ \ - __m256##T1 Vec256 = __builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 0, 1, 2, 3) \ - Operator \ - __builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 4, 5, 6, 7); \ - __m128##T1 Vec128 = __builtin_shufflevector( \ - (__v4d##T2)Vec256, \ - (__v4d##T2)Vec256, \ - 0, 1) \ - Operator \ - __builtin_shufflevector( \ - (__v4d##T2)Vec256, \ - (__v4d##T2)Vec256, \ - 2, 3); \ - Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \ - (__v2d##T2)Vec128, 0, -1) \ - Operator \ - __builtin_shufflevector((__v2d##T2)Vec128, \ - (__v2d##T2)Vec128, 1, -1); \ - return Vec128[0]; \ - }) - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, +, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, *, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, &, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, |, i, i); -} - -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { - _mm512_reduce_operator_64bit(__W, +, f, d); -} - -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { - _mm512_reduce_operator_64bit(__W, *, f, d); -} - -// Vec512 - Vector with size 512. -// Vec512Neutral - All vector elements set to the identity element. -// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0} -// Operator - Can be one of following: +,*,&,| -// Mask - Intrinsic Mask -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for packed double-precision. -// T3 - Can be Pd for packed double or q for q-word. - -#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ - Mask, T2, T1, T3) \ - __extension__({ \ - Vec512 = __builtin_ia32_select##T3##_512( \ - (__mmask8)Mask, \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512Neutral); \ - _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ - }) - -static __inline__ long long __DEFAULT_FN_ATTRS +/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as + * outputs. This class of vector operation forms the basis of many scientific + * computations. In vector-reduction arithmetic, the evaluation off is + * independent of the order of the input elements of V. + + * Used bisection method. At each step, we partition the vector with previous + * step in half, and the operation is performed on its two halves. + * This takes log2(n) steps where n is the number of elements in the vector. + */ + +#define _mm512_mask_reduce_operator(op) \ + __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ + __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ + __m256i __t3 = (__m256i)(__t1 op __t2); \ + __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ + __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ + __v2du __t6 = __t4 op __t5; \ + __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ + __v2du __t8 = __t6 op __t7; \ + return __t8[0]; + +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { + _mm512_mask_reduce_operator(+); +} + +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { + _mm512_mask_reduce_operator(*); +} + +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { + _mm512_mask_reduce_operator(&); +} + +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { + _mm512_mask_reduce_operator(|); +} + +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q); + __W = _mm512_maskz_mov_epi64(__M, __W); + _mm512_mask_reduce_operator(+); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q); + __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); + _mm512_mask_reduce_operator(*); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), - &, __M, i, i, q); + __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W); + _mm512_mask_reduce_operator(&); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, - i, i, q); + __W = _mm512_maskz_mov_epi64(__M, __W); + _mm512_mask_reduce_operator(|); +} +#undef _mm512_mask_reduce_operator + +#define _mm512_mask_reduce_operator(op) \ + __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ + __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ + __m256d __t3 = __t1 op __t2; \ + __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ + __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ + __m128d __t6 = __t4 op __t5; \ + __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ + __m128d __t8 = __t6 op __t7; \ + return __t8[0]; + +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { + _mm512_mask_reduce_operator(+); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { + _mm512_mask_reduce_operator(*); +} + +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, - f, d, pd); + __W = _mm512_maskz_mov_pd(__M, __W); + _mm512_mask_reduce_operator(+); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { - _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, - f, d, pd); -} - -// Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&,| -// T2 - Can get 'i' for int and ' ' for packed single. -// T1 - Can get 'i' for int and 'f' for float. - -#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ - __m256##T1 Vec256 = \ - (__m256##T1)(__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 0, 1, 2, 3, 4, 5, 6, 7) \ - Operator \ - __builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 8, 9, 10, 11, 12, 13, 14, 15)); \ - __m128##T1 Vec128 = \ - (__m128##T1)(__builtin_shufflevector( \ - (__v8s##T2)Vec256, \ - (__v8s##T2)Vec256, \ - 0, 1, 2, 3) \ - Operator \ - __builtin_shufflevector( \ - (__v8s##T2)Vec256, \ - (__v8s##T2)Vec256, \ - 4, 5, 6, 7)); \ - Vec128 = (__m128##T1)(__builtin_shufflevector( \ - (__v4s##T2)Vec128, \ - (__v4s##T2)Vec128, \ - 0, 1, -1, -1) \ - Operator \ - __builtin_shufflevector( \ - (__v4s##T2)Vec128, \ - (__v4s##T2)Vec128, \ - 2, 3, -1, -1)); \ - Vec128 = (__m128##T1)(__builtin_shufflevector( \ - (__v4s##T2)Vec128, \ - (__v4s##T2)Vec128, \ - 0, -1, -1, -1) \ - Operator \ - __builtin_shufflevector( \ - (__v4s##T2)Vec128, \ - (__v4s##T2)Vec128, \ - 1, -1, -1, -1)); \ - return Vec128[0]; \ - }) - -static __inline__ int __DEFAULT_FN_ATTRS + __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); + _mm512_mask_reduce_operator(*); +} +#undef _mm512_mask_reduce_operator + +#define _mm512_mask_reduce_operator(op) \ + __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ + __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ + __m256i __t3 = (__m256i)(__t1 op __t2); \ + __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ + __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ + __v4su __t6 = __t4 op __t5; \ + __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ + __v4su __t8 = __t6 op __t7; \ + __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ + __v4su __t10 = __t8 op __t9; \ + return __t10[0]; + +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { - _mm512_reduce_operator_32bit(__W, +, i, i); + _mm512_mask_reduce_operator(+); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W) { - _mm512_reduce_operator_32bit(__W, *, i, i); + _mm512_mask_reduce_operator(*); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W) { - _mm512_reduce_operator_32bit(__W, &, i, i); + _mm512_mask_reduce_operator(&); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W) { - _mm512_reduce_operator_32bit(__W, |, i, i); -} - -static __inline__ float __DEFAULT_FN_ATTRS -_mm512_reduce_add_ps(__m512 __W) { - _mm512_reduce_operator_32bit(__W, +, f, ); + _mm512_mask_reduce_operator(|); } -static __inline__ float __DEFAULT_FN_ATTRS -_mm512_reduce_mul_ps(__m512 __W) { - _mm512_reduce_operator_32bit(__W, *, f, ); -} - -// Vec512 - Vector with size 512. -// Vec512Neutral - All vector elements set to the identity element. -// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0} -// Operator - Can be one of following: +,*,&,| -// Mask - Intrinsic Mask -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for double. -// T3 - Can be Ps for packed single or d for d-word. - -#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \ - Mask, T2, T1, T3) \ - __extension__({ \ - Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ - (__mmask16)Mask, \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512Neutral); \ - _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \ - }) - -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d); + __W = _mm512_maskz_mov_epi32(__M, __W); + _mm512_mask_reduce_operator(+); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d); + __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); + _mm512_mask_reduce_operator(*); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, - i, i, d); + __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W); + _mm512_mask_reduce_operator(&); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d); + __W = _mm512_maskz_mov_epi32(__M, __W); + _mm512_mask_reduce_operator(|); +} +#undef _mm512_mask_reduce_operator + +#define _mm512_mask_reduce_operator(op) \ + __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ + __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ + __m256 __t3 = __t1 op __t2; \ + __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ + __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ + __m128 __t6 = __t4 op __t5; \ + __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ + __m128 __t8 = __t6 op __t7; \ + __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ + __m128 __t10 = __t8 op __t9; \ + return __t10[0]; + +static __inline__ float __DEFAULT_FN_ATTRS512 +_mm512_reduce_add_ps(__m512 __W) { + _mm512_mask_reduce_operator(+); +} + +static __inline__ float __DEFAULT_FN_ATTRS512 +_mm512_reduce_mul_ps(__m512 __W) { + _mm512_mask_reduce_operator(*); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps); + __W = _mm512_maskz_mov_ps(__M, __W); + _mm512_mask_reduce_operator(+); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { - _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); -} - -// Used bisection method. At each step, we partition the vector with previous -// step in half, and the operation is performed on its two halves. -// This takes log2(n) steps where n is the number of elements in the vector. -// This macro uses only intrinsics from the AVX512F feature. - -// Vec512 - Vector with size of 512. -// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: -// __mm512_max_epi64 -// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] -// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] - -#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 0, 1, 2, 3, -1, -1, -1, -1), \ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 4, 5, 6, 7, -1, -1, -1, -1)); \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 0, 1, -1, -1, -1, -1, -1, -1),\ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 2, 3, -1, -1, -1, -1, -1, \ - -1)); \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 0, -1, -1, -1, -1, -1, -1, -1),\ - (__m512##T1)__builtin_shufflevector( \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512, \ - 1, -1, -1, -1, -1, -1, -1, -1))\ - ; \ - return Vec512[0]; \ - }) - -static __inline__ long long __DEFAULT_FN_ATTRS -_mm512_reduce_max_epi64(__m512i __V) { - _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i); + __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); + _mm512_mask_reduce_operator(*); } +#undef _mm512_mask_reduce_operator -static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_mm512_reduce_max_epu64(__m512i __V) { - _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i); +#define _mm512_mask_reduce_operator(op) \ + __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ + __m512i __t2 = _mm512_##op(__V, __t1); \ + __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ + __m512i __t4 = _mm512_##op(__t2, __t3); \ + __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ + __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ + return __t6[0]; + +static __inline__ long long __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_epi64(__m512i __V) { + _mm512_mask_reduce_operator(max_epi64); } -static __inline__ double __DEFAULT_FN_ATTRS -_mm512_reduce_max_pd(__m512d __V) { - _mm512_reduce_maxMin_64bit(__V, max_pd, d, f); +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_epu64(__m512i __V) { + _mm512_mask_reduce_operator(max_epu64); } -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64 -(__m512i __V) { - _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i); +static __inline__ long long __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_epi64(__m512i __V) { + _mm512_mask_reduce_operator(min_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V) { - _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i); + _mm512_mask_reduce_operator(min_epu64); } -static __inline__ double __DEFAULT_FN_ATTRS -_mm512_reduce_min_pd(__m512d __V) { - _mm512_reduce_maxMin_64bit(__V, min_pd, d, f); -} - -// Vec512 - Vector with size 512. -// Vec512Neutral - A 512 length vector with elements set to the identity element -// Identity element: {max_epi,0x8000000000000000} -// {max_epu,0x0000000000000000} -// {max_pd, 0xFFF0000000000000} -// {min_epi,0x7FFFFFFFFFFFFFFF} -// {min_epu,0xFFFFFFFFFFFFFFFF} -// {min_pd, 0x7FF0000000000000} -// -// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: -// __mm512_max_epi64 -// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] -// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] -// T3 - Can get 'q' q word and 'pd' for packed double. -// [__builtin_ia32_select{q|pd}_512] -// Mask - Intrinsic Mask - -#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \ - T2, T3, Mask) \ - __extension__({ \ - Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ - (__mmask8)Mask, \ - (__v8d##T2)Vec512, \ - (__v8d##T2)Vec512Neutral); \ - _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \ - }) - -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000), - max_epi64, i, i, q, __M); + __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); + _mm512_mask_reduce_operator(max_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000), - max_epu64, i, i, q, __M); -} - -static __inline__ double __DEFAULT_FN_ATTRS -_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { - _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), - max_pd, d, f, pd, __M); + __V = _mm512_maskz_mov_epi64(__M, __V); + _mm512_mask_reduce_operator(max_epu64); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF), - min_epi64, i, i, q, __M); + __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); + _mm512_mask_reduce_operator(min_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), - min_epu64, i, i, q, __M); + __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V); + _mm512_mask_reduce_operator(min_epu64); } +#undef _mm512_mask_reduce_operator -static __inline__ double __DEFAULT_FN_ATTRS -_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), - min_pd, d, f, pd, __M); -} - -// Vec512 - Vector with size 512. -// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: -// __mm512_max_epi32 -// T1 - Can get 'i' for int and ' ' .[__m512{i|}] -// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] - -#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 0, 1, 2, 3, 4, 5, 6, 7, \ - -1, -1, -1, -1, -1, -1, -1, -1), \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 8, 9, 10, 11, 12, 13, 14, 15, \ - -1, -1, -1, -1, -1, -1, -1, -1)); \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 0, 1, 2, 3, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1), \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 4, 5, 6, 7, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1)); \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 0, 1, -1, -1, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1), \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 2, 3, -1, -1, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1)); \ - Vec512 = _mm512_##IntrinName( \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 0, -1, -1, -1, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1), \ - (__m512##T1)__builtin_shufflevector( \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512, \ - 1, -1, -1, -1, -1, -1, -1, -1, \ - -1, -1, -1, -1, -1, -1, -1, -1)); \ - return Vec512[0]; \ - }) - -static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) { - _mm512_reduce_maxMin_32bit(a, max_epi32, i, i); -} +#define _mm512_mask_reduce_operator(op) \ + __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ + __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ + __m256i __t3 = _mm256_##op(__t1, __t2); \ + __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ + __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ + __m128i __t6 = _mm_##op(__t4, __t5); \ + __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ + __m128i __t8 = _mm_##op(__t6, __t7); \ + __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ + __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ + return __t10[0]; -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm512_reduce_max_epu32(__m512i a) { - _mm512_reduce_maxMin_32bit(a, max_epu32, i, i); +static __inline__ int __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_epi32(__m512i __V) { + _mm512_mask_reduce_operator(max_epi32); } -static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) { - _mm512_reduce_maxMin_32bit(a, max_ps, , f); +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_epu32(__m512i __V) { + _mm512_mask_reduce_operator(max_epu32); } -static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) { - _mm512_reduce_maxMin_32bit(a, min_epi32, i, i); +static __inline__ int __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_epi32(__m512i __V) { + _mm512_mask_reduce_operator(min_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm512_reduce_min_epu32(__m512i a) { - _mm512_reduce_maxMin_32bit(a, min_epu32, i, i); -} - -static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) { - _mm512_reduce_maxMin_32bit(a, min_ps, , f); -} - -// Vec512 - Vector with size 512. -// Vec512Neutral - A 512 length vector with elements set to the identity element -// Identity element: {max_epi,0x80000000} -// {max_epu,0x00000000} -// {max_ps, 0xFF800000} -// {min_epi,0x7FFFFFFF} -// {min_epu,0xFFFFFFFF} -// {min_ps, 0x7F800000} -// -// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: -// __mm512_max_epi32 -// T1 - Can get 'i' for int and ' ' .[__m512{i|}] -// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] -// T3 - Can get 'q' q word and 'pd' for packed double. -// [__builtin_ia32_select{q|pd}_512] -// Mask - Intrinsic Mask - -#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \ - T2, T3, Mask) \ - __extension__({ \ - Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ - (__mmask16)Mask, \ - (__v16s##T2)Vec512, \ - (__v16s##T2)Vec512Neutral); \ - _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \ - }) +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_epu32(__m512i __V) { + _mm512_mask_reduce_operator(min_epu32); +} -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32, - i, i, d, __M); + __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); + _mm512_mask_reduce_operator(max_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32, - i, i, d, __M); -} - -static __inline__ float __DEFAULT_FN_ATTRS -_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { - _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, - ps, __M); + __V = _mm512_maskz_mov_epi32(__M, __V); + _mm512_mask_reduce_operator(max_epu32); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32, - i, i, d, __M); + __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); + _mm512_mask_reduce_operator(min_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32, - i, i, d, __M); + __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V); + _mm512_mask_reduce_operator(min_epu32); +} +#undef _mm512_mask_reduce_operator + +#define _mm512_mask_reduce_operator(op) \ + __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ + __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \ + __m256d __t3 = _mm256_##op(__t1, __t2); \ + __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ + __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ + __m128d __t6 = _mm_##op(__t4, __t5); \ + __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ + __m128d __t8 = _mm_##op(__t6, __t7); \ + return __t8[0]; + +static __inline__ double __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_pd(__m512d __V) { + _mm512_mask_reduce_operator(max_pd); +} + +static __inline__ double __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_pd(__m512d __V) { + _mm512_mask_reduce_operator(min_pd); +} + +static __inline__ double __DEFAULT_FN_ATTRS512 +_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { + __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); + _mm512_mask_reduce_operator(max_pd); +} + +static __inline__ double __DEFAULT_FN_ATTRS512 +_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { + __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); + _mm512_mask_reduce_operator(min_pd); +} +#undef _mm512_mask_reduce_operator + +#define _mm512_mask_reduce_operator(op) \ + __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \ + __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \ + __m256 __t3 = _mm256_##op(__t1, __t2); \ + __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ + __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ + __m128 __t6 = _mm_##op(__t4, __t5); \ + __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ + __m128 __t8 = _mm_##op(__t6, __t7); \ + __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ + __m128 __t10 = _mm_##op(__t8, __t9); \ + return __t10[0]; + +static __inline__ float __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_ps(__m512 __V) { + _mm512_mask_reduce_operator(max_ps); +} + +static __inline__ float __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_ps(__m512 __V) { + _mm512_mask_reduce_operator(min_ps); +} + +static __inline__ float __DEFAULT_FN_ATTRS512 +_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { + __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); + _mm512_mask_reduce_operator(max_ps); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, - ps, __M); + __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); + _mm512_mask_reduce_operator(min_ps); } +#undef _mm512_mask_reduce_operator +#undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS -#endif // __AVX512FINTRIN_H +#endif /* __AVX512FINTRIN_H */ diff --git a/lib/clang/7.0.0/include/avx512ifmaintrin.h b/lib/clang/8.0.0/include/avx512ifmaintrin.h similarity index 66% rename from lib/clang/7.0.0/include/avx512ifmaintrin.h rename to lib/clang/8.0.0/include/avx512ifmaintrin.h index 5defbae..1597130 100644 --- a/lib/clang/7.0.0/include/avx512ifmaintrin.h +++ b/lib/clang/8.0.0/include/avx512ifmaintrin.h @@ -29,62 +29,52 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) { - return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __Z, - (__mmask8) -1); + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, + (__v8di) __Z); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) +_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W, - (__v8di) __X, - (__v8di) __Y, - (__mmask8) __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { - return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X, - (__v8di) __Y, - (__v8di) __Z, - (__mmask8) __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) { - return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __Z, - (__mmask8) -1); + return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, + (__v8di) __Z); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) +_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W, - (__v8di) __X, - (__v8di) __Y, - (__mmask8) __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { - return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X, - (__v8di) __Y, - (__v8di) __Z, - (__mmask8) __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS diff --git a/lib/clang/8.0.0/include/avx512ifmavlintrin.h b/lib/clang/8.0.0/include/avx512ifmavlintrin.h new file mode 100644 index 0000000..afdea88 --- /dev/null +++ b/lib/clang/8.0.0/include/avx512ifmavlintrin.h @@ -0,0 +1,133 @@ +/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __IFMAVLINTRIN_H +#define __IFMAVLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) + + + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y, + (__v2di) __Z); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), + (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, + (__v4di)__Z); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), + (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, + (__v2di)__Z); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), + (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, + (__v4di)__Z); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), + (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); +} + + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/clang/7.0.0/include/avx512pfintrin.h b/lib/clang/8.0.0/include/avx512pfintrin.h similarity index 78% rename from lib/clang/7.0.0/include/avx512pfintrin.h rename to lib/clang/8.0.0/include/avx512pfintrin.h index c7fa3cf..5b8260b 100644 --- a/lib/clang/7.0.0/include/avx512pfintrin.h +++ b/lib/clang/8.0.0/include/avx512pfintrin.h @@ -1,4 +1,4 @@ -/*===------------- avx512pfintrin.h - PF intrinsics ------------------=== +/*===------------- avx512pfintrin.h - PF intrinsics ------------------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -31,80 +31,80 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf"))) -#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) - -#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\ + (int)(hint)) + +#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\ +#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int const *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\ +#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16) -1, \ (__v16si)(__m512i)(index), (int const *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) - -#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\ + (int)(hint)) + +#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (int const *)(addr), (int)(scale), (int)(hint)); }) + (int const *)(addr), (int)(scale), (int)(hint)) -#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\ +#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ - (int const *)(addr), (int)(scale), (int)(hint)); }) + (int const *)(addr), (int)(scale), (int)(hint)) -#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) #undef __DEFAULT_FN_ATTRS diff --git a/lib/clang/7.0.0/include/avx512vbmi2intrin.h b/lib/clang/8.0.0/include/avx512vbmi2intrin.h similarity index 72% rename from lib/clang/7.0.0/include/avx512vbmi2intrin.h rename to lib/clang/8.0.0/include/avx512vbmi2intrin.h index 43e97b4..d2a5809 100644 --- a/lib/clang/7.0.0/include/avx512vbmi2intrin.h +++ b/lib/clang/8.0.0/include/avx512vbmi2intrin.h @@ -29,7 +29,7 @@ #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -44,7 +44,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -60,7 +60,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } @@ -90,7 +90,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -106,7 +106,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } @@ -122,7 +122,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -138,87 +138,93 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } -#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ - (__v8di)(B), \ - (int)(I), \ - (__v8di)(S), \ - (__mmask8)(U)); }) +#define _mm512_shldi_epi64(A, B, I) \ + (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) + +#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ - _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) -#define _mm512_shldi_epi64(A, B, I) \ - _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) +#define _mm512_shldi_epi32(A, B, I) \ + (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) -#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ - (__v16si)(B), \ - (int)(I), \ - (__v16si)(S), \ - (__mmask16)(U)); }) +#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ - _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) -#define _mm512_shldi_epi32(A, B, I) \ - _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) +#define _mm512_shldi_epi16(A, B, I) \ + (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) -#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ - (__v32hi)(B), \ - (int)(I), \ - (__v32hi)(S), \ - (__mmask32)(U)); }) +#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ - _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) -#define _mm512_shldi_epi16(A, B, I) \ - _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) +#define _mm512_shrdi_epi64(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ - (__v8di)(B), \ - (int)(I), \ - (__v8di)(S), \ - (__mmask8)(U)); }) +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ - _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) -#define _mm512_shrdi_epi64(A, B, I) \ - _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) +#define _mm512_shrdi_epi32(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ - (__v16si)(B), \ - (int)(I), \ - (__v16si)(S), \ - (__mmask16)(U)); }) +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ - _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) -#define _mm512_shrdi_epi32(A, B, I) \ - _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) +#define _mm512_shrdi_epi16(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ - (__v32hi)(B), \ - (int)(I), \ - (__v32hi)(S), \ - (__mmask32)(U)); }) +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ - _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) - -#define _mm512_shrdi_epi16(A, B, I) \ - _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) diff --git a/lib/clang/7.0.0/include/avx512vbmiintrin.h b/lib/clang/8.0.0/include/avx512vbmiintrin.h similarity index 64% rename from lib/clang/7.0.0/include/avx512vbmiintrin.h rename to lib/clang/8.0.0/include/avx512vbmiintrin.h index 837238e..b6e93c2 100644 --- a/lib/clang/7.0.0/include/avx512vbmiintrin.h +++ b/lib/clang/8.0.0/include/avx512vbmiintrin.h @@ -29,79 +29,65 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I, - __mmask64 __U, __m512i __B) +_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) { - return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A, - (__v64qi) __I - /* idx */ , - (__v64qi) __B, - (__mmask64) __U); + return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, + (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B) +_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I - /* idx */ , - (__v64qi) __A, - (__v64qi) __B, - (__mmask64) -1); + return (__m512i)__builtin_ia32_selectb_512(__U, + (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), + (__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U, - __m512i __I, __m512i __B) +_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I - /* idx */ , - (__v64qi) __A, - (__v64qi) __B, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512(__U, + (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), + (__v64qi)__I); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A, - __m512i __I, __m512i __B) +_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, + __m512i __B) { - return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I - /* idx */ , - (__v64qi) __A, - (__v64qi) __B, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512(__U, + (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, - (__v64qi) __A, - (__v64qi) _mm512_undefined_epi32 (), - (__mmask64) -1); + return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, - (__v64qi) __A, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_permutexvar_epi8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, - (__v64qi) __A, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_permutexvar_epi8(__A, __B), + (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/clang/8.0.0/include/avx512vbmivlintrin.h b/lib/clang/8.0.0/include/avx512vbmivlintrin.h new file mode 100644 index 0000000..9a0400b --- /dev/null +++ b/lib/clang/8.0.0/include/avx512vbmivlintrin.h @@ -0,0 +1,214 @@ +/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __VBMIVLINTRIN_H +#define __VBMIVLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) + + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, + (__v16qi)__I, + (__v16qi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, + __m128i __B) +{ + return (__m128i)__builtin_ia32_selectb_128(__U, + (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), + (__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, + __m128i __B) +{ + return (__m128i)__builtin_ia32_selectb_128(__U, + (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), + (__v16qi)__I); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, + __m128i __B) +{ + return (__m128i)__builtin_ia32_selectb_128(__U, + (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), + (__v16qi)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, + (__v32qi)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectb_256(__U, + (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), + (__v32qi)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectb_256(__U, + (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), + (__v32qi)__I); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectb_256(__U, + (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), + (__v32qi)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_permutexvar_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm_permutexvar_epi8(__A, __B), + (__v16qi)_mm_setzero_si128()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm_permutexvar_epi8(__A, __B), + (__v16qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm256_permutexvar_epi8(__A, __B), + (__v32qi)_mm256_setzero_si256()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm256_permutexvar_epi8(__A, __B), + (__v32qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, + (__v16qi) __Y, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, + (__v16qi) __Y, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, + (__v16qi) __Y, + (__v16qi) + _mm_undefined_si128 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, + (__v32qi) __Y, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, + (__v32qi) __Y, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, + (__v32qi) __Y, + (__v32qi) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/clang/7.0.0/include/avx512vlbitalgintrin.h b/lib/clang/8.0.0/include/avx512vlbitalgintrin.h similarity index 60% rename from lib/clang/7.0.0/include/avx512vlbitalgintrin.h rename to lib/clang/8.0.0/include/avx512vlbitalgintrin.h index 76eb877..64860b2 100644 --- a/lib/clang/7.0.0/include/avx512vlbitalgintrin.h +++ b/lib/clang/8.0.0/include/avx512vlbitalgintrin.h @@ -1,4 +1,4 @@ -/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------=== +/*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -29,15 +29,16 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, @@ -45,7 +46,7 @@ _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), @@ -53,35 +54,35 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi16(__m128i __A) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, - (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) _mm_popcnt_epi16(__B), (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { - return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), __U, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, @@ -89,7 +90,7 @@ _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), @@ -97,61 +98,62 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi8(__m128i __A) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, - (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) _mm_popcnt_epi8(__B), (__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { - return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), __U, __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 +_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, (__v32qi) __B, __U); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 +_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { - return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, __A, __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 +_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, (__v16qi) __B, __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 +_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { - return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, __A, __B); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif diff --git a/lib/clang/7.0.0/include/avx512vlbwintrin.h b/lib/clang/8.0.0/include/avx512vlbwintrin.h similarity index 78% rename from lib/clang/7.0.0/include/avx512vlbwintrin.h rename to lib/clang/8.0.0/include/avx512vlbwintrin.h index e940e2b..1b038dd 100644 --- a/lib/clang/7.0.0/include/avx512vlbwintrin.h +++ b/lib/clang/8.0.0/include/avx512vlbwintrin.h @@ -29,94 +29,90 @@ #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) - -static __inline __m128i __DEFAULT_FN_ATTRS -_mm_setzero_hi(void){ - return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; -} +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) /* Integer compare */ -#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi8_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu8_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi8_mask(a, b, p) \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu8_mask(a, b, p) \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi16_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu16_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi16_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu16_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) #define _mm_cmpeq_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -318,147 +314,147 @@ _mm_setzero_hi(void){ #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -466,7 +462,7 @@ _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -474,7 +470,7 @@ _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -482,7 +478,7 @@ _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -490,7 +486,7 @@ _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) (__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -498,7 +494,7 @@ _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -506,7 +502,7 @@ _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -514,7 +510,7 @@ _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -522,7 +518,7 @@ _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -530,7 +526,7 @@ _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -538,7 +534,7 @@ _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -546,7 +542,7 @@ _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -554,22 +550,22 @@ _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -577,7 +573,7 @@ _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -585,7 +581,7 @@ _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -593,7 +589,7 @@ _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -601,7 +597,7 @@ _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -609,7 +605,7 @@ _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -617,7 +613,7 @@ _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -625,15 +621,15 @@ _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_packus_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packus_epi32(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -641,7 +637,7 @@ _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -649,7 +645,7 @@ _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -657,7 +653,7 @@ _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -665,7 +661,7 @@ _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -673,7 +669,7 @@ _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -681,7 +677,7 @@ _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -689,7 +685,7 @@ _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -697,7 +693,7 @@ _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -705,7 +701,7 @@ _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -713,7 +709,7 @@ _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -721,7 +717,7 @@ _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -729,7 +725,7 @@ _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -737,7 +733,7 @@ _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -745,7 +741,7 @@ _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -753,7 +749,7 @@ _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -761,7 +757,7 @@ _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -769,7 +765,7 @@ _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -777,7 +773,7 @@ _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -785,7 +781,7 @@ _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -793,7 +789,7 @@ _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -801,7 +797,7 @@ _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -809,7 +805,7 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -817,7 +813,7 @@ _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -825,7 +821,7 @@ _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -833,7 +829,7 @@ _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -841,7 +837,7 @@ _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -849,7 +845,7 @@ _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -857,7 +853,7 @@ _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -865,7 +861,7 @@ _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -873,7 +869,7 @@ _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -881,7 +877,7 @@ _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -889,7 +885,7 @@ _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -897,7 +893,7 @@ _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -905,7 +901,7 @@ _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -913,7 +909,7 @@ _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -921,7 +917,7 @@ _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -929,7 +925,7 @@ _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -937,7 +933,7 @@ _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -945,7 +941,7 @@ _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -953,7 +949,7 @@ _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -961,7 +957,7 @@ _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -969,7 +965,7 @@ _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -977,7 +973,7 @@ _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -985,7 +981,7 @@ _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -993,7 +989,7 @@ _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1001,7 +997,7 @@ _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1009,7 +1005,7 @@ _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1017,7 +1013,7 @@ _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1025,7 +1021,7 @@ _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1033,7 +1029,7 @@ _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1041,7 +1037,7 @@ _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1049,7 +1045,7 @@ _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1057,7 +1053,7 @@ _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1065,7 +1061,7 @@ _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1073,7 +1069,7 @@ _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1081,7 +1077,7 @@ _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1089,7 +1085,7 @@ _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1097,7 +1093,7 @@ _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1105,7 +1101,7 @@ _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1113,7 +1109,7 @@ _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1121,7 +1117,7 @@ _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1129,7 +1125,7 @@ _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1137,7 +1133,7 @@ _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1145,7 +1141,7 @@ _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1153,7 +1149,7 @@ _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1161,7 +1157,7 @@ _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1169,7 +1165,7 @@ _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1177,7 +1173,7 @@ _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1185,7 +1181,7 @@ _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1193,7 +1189,7 @@ _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1201,7 +1197,7 @@ _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1209,7 +1205,7 @@ _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1217,7 +1213,7 @@ _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1225,7 +1221,7 @@ _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1233,7 +1229,7 @@ _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1241,7 +1237,7 @@ _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1249,7 +1245,7 @@ _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1257,7 +1253,7 @@ _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1265,7 +1261,7 @@ _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1273,7 +1269,7 @@ _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1281,7 +1277,7 @@ _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1289,99 +1285,89 @@ _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { - return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, - (__v8hi) __I /* idx */ , - (__v8hi) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, + (__v8hi) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I, - __mmask16 __U, __m256i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { - return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, - (__v16hi) __I /* idx */ , - (__v16hi) __B, - (__mmask16) __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, + (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, + __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U, - __m256i __I, __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, + __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, - __m256i __I, __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, + __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1389,402 +1375,400 @@ _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { - - return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8hi)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask16) -1); + return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) __O, - __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm256_cvtepi16_epi8(__A), + (__v16qi)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) _mm_setzero_si128(), - __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm256_cvtepi16_epi8(__A), + (__v16qi)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS -_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1792,7 +1776,7 @@ _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1800,7 +1784,7 @@ _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1808,7 +1792,7 @@ _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1817,7 +1801,7 @@ _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1825,7 +1809,7 @@ _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1833,7 +1817,7 @@ _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1841,7 +1825,7 @@ _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1850,55 +1834,55 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) } -#define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_shufflehi_epi16(W, U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)); }) + (__v8hi)(__m128i)(W)) -#define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm_maskz_shufflehi_epi16(U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_hi()); }) + (__v8hi)_mm_setzero_si128()) -#define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)(__m256i)(W)); }) + (__v16hi)(__m256i)(W)) -#define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_shufflehi_epi16(U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)_mm256_setzero_si256()); }) + (__v16hi)_mm256_setzero_si256()) -#define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_shufflelo_epi16(W, U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)); }) + (__v8hi)(__m128i)(W)) -#define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm_maskz_shufflelo_epi16(U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_hi()); }) + (__v8hi)_mm_setzero_si128()) -#define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ - (__v16hi)(__m256i)(W)); }) + (__v16hi)(__m256i)(W)) -#define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_shufflelo_epi16(U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ - (__v16hi)_mm256_setzero_si256()); }) + (__v16hi)_mm256_setzero_si256()) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1906,7 +1890,7 @@ _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1914,13 +1898,13 @@ _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1928,7 +1912,7 @@ _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1936,7 +1920,7 @@ _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1944,7 +1928,7 @@ _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1952,7 +1936,7 @@ _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1960,7 +1944,7 @@ _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1968,7 +1952,7 @@ _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1976,7 +1960,7 @@ _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1984,7 +1968,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1992,7 +1976,7 @@ _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2000,13 +1984,13 @@ _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2014,7 +1998,7 @@ _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2022,13 +2006,13 @@ _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2036,7 +2020,7 @@ _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2044,13 +2028,13 @@ _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2058,7 +2042,7 @@ _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2066,13 +2050,13 @@ _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2080,7 +2064,7 @@ _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2088,7 +2072,7 @@ _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2096,7 +2080,7 @@ _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2104,7 +2088,7 @@ _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2112,7 +2096,7 @@ _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2120,7 +2104,7 @@ _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2128,7 +2112,7 @@ _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2136,7 +2120,7 @@ _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2144,7 +2128,7 @@ _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2152,7 +2136,7 @@ _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2160,7 +2144,7 @@ _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2168,7 +2152,7 @@ _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2176,7 +2160,7 @@ _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2184,7 +2168,7 @@ _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2192,7 +2176,7 @@ _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2200,7 +2184,7 @@ _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2208,7 +2192,7 @@ _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2216,7 +2200,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2224,15 +2208,15 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, - (__v8hi) _mm_setzero_hi ()); + (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2240,7 +2224,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2248,7 +2232,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2256,15 +2240,15 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, - (__v16qi) _mm_setzero_hi ()); + (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2272,7 +2256,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2281,7 +2265,7 @@ _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2289,7 +2273,7 @@ _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8 (__mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2297,7 +2281,7 @@ _mm_maskz_set1_epi8 (__mmask16 __M, char __A) (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2305,7 +2289,7 @@ _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2313,7 +2297,7 @@ _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, @@ -2321,16 +2305,16 @@ _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2338,7 +2322,7 @@ _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2347,7 +2331,7 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2355,7 +2339,7 @@ _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2364,7 +2348,7 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2372,7 +2356,7 @@ _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) (__mmask32) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2380,7 +2364,7 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) _mm256_setzero_si256 (), (__mmask32) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, @@ -2388,7 +2372,7 @@ _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) { __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, @@ -2396,7 +2380,7 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) { __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, @@ -2404,7 +2388,7 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) { __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, @@ -2412,162 +2396,162 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) (__mmask32) __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_hi()); + return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256 ()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { - return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_hi()); + return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask (__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask (__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask (__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8 (__mmask16 __A) { return (__m128i) __builtin_ia32_cvtmask2b128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8 (__mmask32 __A) { return (__m256i) __builtin_ia32_cvtmask2b256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2w128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16 (__mmask16 __A) { return (__m256i) __builtin_ia32_cvtmask2w256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2575,7 +2559,7 @@ _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2583,7 +2567,7 @@ _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2591,7 +2575,7 @@ _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2599,7 +2583,7 @@ _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2607,7 +2591,7 @@ _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2615,7 +2599,7 @@ _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2623,7 +2607,7 @@ _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2631,7 +2615,7 @@ _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256 (__M, @@ -2639,7 +2623,7 @@ _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256(__M, @@ -2647,7 +2631,7 @@ _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2655,7 +2639,7 @@ _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16 (__mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2663,119 +2647,102 @@ _mm_maskz_set1_epi16 (__mmask8 __M, short __A) (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, - (__v8hi) __A, - (__v8hi) _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, - (__v8hi) __A, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm_permutexvar_epi16(__A, __B), + (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, - (__v8hi) __A, - (__v8hi) __W, - (__mmask8) __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm_permutexvar_epi16(__A, __B), + (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, - (__v16hi) __A, - (__v16hi) _mm256_undefined_si256 (), - (__mmask16) -1); + return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, - (__v16hi) __A, - (__v16hi) _mm256_setzero_si256 (), - (__mmask16) __M); + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, + (__v16hi)_mm256_permutexvar_epi16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, - (__v16hi) __A, - (__v16hi) __W, - (__mmask16) __M); + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, + (__v16hi)_mm256_permutexvar_epi16(__A, __B), + (__v16hi)__W); } -#define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ +#define _mm_mask_alignr_epi8(W, U, A, B, N) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)(__m128i)(W)); }) + (__v16qi)(__m128i)(W)) -#define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ +#define _mm_maskz_alignr_epi8(U, A, B, N) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)_mm_setzero_si128()); }) + (__v16qi)_mm_setzero_si128()) -#define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ +#define _mm256_mask_alignr_epi8(W, U, A, B, N) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)(__m256i)(W)); }) + (__v32qi)(__m256i)(W)) -#define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ +#define _mm256_maskz_alignr_epi8(U, A, B, N) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)_mm256_setzero_si256()); }) - -#define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)_mm_setzero_hi(), \ - (__mmask8)-1); }) - -#define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) - -#define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) - -#define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1); }) - -#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)(__m256i)(W), \ - (__mmask16)(U)); }) - -#define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)); }) - -#undef __DEFAULT_FN_ATTRS + (__v32qi)_mm256_setzero_si256()) + +#define _mm_dbsad_epu8(A, B, imm) \ + (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm)) + +#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ + (__v8hi)(__m128i)(W)) + +#define _mm_maskz_dbsad_epu8(U, A, B, imm) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ + (__v8hi)_mm_setzero_si128()) + +#define _mm256_dbsad_epu8(A, B, imm) \ + (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm)) + +#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ + (__v16hi)(__m256i)(W)) + +#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ + (__v16hi)_mm256_setzero_si256()) + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLBWINTRIN_H */ diff --git a/lib/clang/7.0.0/include/avx512vlcdintrin.h b/lib/clang/8.0.0/include/avx512vlcdintrin.h similarity index 57% rename from lib/clang/7.0.0/include/avx512vlcdintrin.h rename to lib/clang/8.0.0/include/avx512vlcdintrin.h index 8f1cd25..903a7c2 100644 --- a/lib/clang/7.0.0/include/avx512vlcdintrin.h +++ b/lib/clang/8.0.0/include/avx512vlcdintrin.h @@ -1,4 +1,4 @@ -/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------=== +/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -28,35 +28,36 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64 (__mmask8 __A) -{ +{ return (__m128i) _mm_set1_epi64x((long long) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmb_epi64 (__mmask8 __A) { return (__m256i) _mm256_set1_epi64x((long long)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmw_epi32 (__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmw_epi32 (__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -64,7 +65,7 @@ _mm_conflict_epi64 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -72,16 +73,16 @@ _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -89,7 +90,7 @@ _mm256_conflict_epi64 (__m256i __A) (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -97,7 +98,7 @@ _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -105,7 +106,7 @@ _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -113,7 +114,7 @@ _mm_conflict_epi32 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -121,7 +122,7 @@ _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -129,7 +130,7 @@ _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -137,7 +138,7 @@ _mm256_conflict_epi32 (__m256i __A) (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -145,7 +146,7 @@ _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -154,110 +155,95 @@ _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_lzcnt_epi32(__A), + (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_lzcnt_epi32(__A), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32 (__m256i __A) { - return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_lzcnt_epi32(__A), + (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_lzcnt_epi32(__A), + (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64 (__m128i __A) { - return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_di (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_lzcnt_epi64(__A), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_di (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_lzcnt_epi64(__A), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64 (__m256i __A) { - return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_lzcnt_epi64(__A), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_lzcnt_epi64(__A), + (__v4di)_mm256_setzero_si256()); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLCDINTRIN_H */ diff --git a/lib/clang/7.0.0/include/avx512vldqintrin.h b/lib/clang/8.0.0/include/avx512vldqintrin.h similarity index 69% rename from lib/clang/7.0.0/include/avx512vldqintrin.h rename to lib/clang/8.0.0/include/avx512vldqintrin.h index d80df9e..9d13846 100644 --- a/lib/clang/7.0.0/include/avx512vldqintrin.h +++ b/lib/clang/8.0.0/include/avx512vldqintrin.h @@ -29,961 +29,953 @@ #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd (__m128i __A) { - return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, - (__v2df) _mm_setzero_pd(), - (__mmask8) -1); + return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { - return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, - (__v2df) __W, - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_cvtepi64_pd(__A), + (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { - return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, - (__v2df) _mm_setzero_pd(), - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_cvtepi64_pd(__A), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd (__m256i __A) { - return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, - (__v4df) _mm256_setzero_pd(), - (__mmask8) -1); + return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { - return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, - (__v4df) __W, - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_cvtepi64_pd(__A), + (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { - return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, - (__v4df) _mm256_setzero_pd(), - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_cvtepi64_pd(__A), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd (__m128i __A) { - return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, - (__v2df) _mm_setzero_pd(), - (__mmask8) -1); + return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { - return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, - (__v2df) __W, - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_cvtepu64_pd(__A), + (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { - return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, - (__v2df) _mm_setzero_pd(), - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_cvtepu64_pd(__A), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd (__m256i __A) { - return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, - (__v4df) _mm256_setzero_pd(), - (__mmask8) -1); + return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { - return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, - (__v4df) __W, - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_cvtepu64_pd(__A), + (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { - return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, - (__v4df) _mm256_setzero_pd(), - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_cvtepu64_pd(__A), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -#define _mm_range_pd(A, B, C) __extension__ ({ \ +#define _mm_range_pd(A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_range_pd(W, U, A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_range_pd(U, A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_range_pd(A, B, C) __extension__ ({ \ +#define _mm256_range_pd(A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_range_pd(W, U, A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_range_pd(U, A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_range_ps(A, B, C) __extension__ ({ \ +#define _mm_range_ps(A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_range_ps(W, U, A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)(__m128)(W), (__mmask8)(U)); }) + (__v4sf)(__m128)(W), (__mmask8)(U)) -#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_range_ps(U, A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_range_ps(A, B, C) __extension__ ({ \ +#define _mm256_range_ps(A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_range_ps(W, U, A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)(__m256)(W), (__mmask8)(U)); }) + (__v8sf)(__m256)(W), (__mmask8)(U)) -#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_range_ps(U, A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_reduce_pd(A, B) __extension__ ({ \ +#define _mm_reduce_pd(A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm_mask_reduce_pd(W, U, A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm_maskz_reduce_pd(U, A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_reduce_pd(A, B) __extension__ ({ \ +#define _mm256_reduce_pd(A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_reduce_pd(W, U, A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm256_maskz_reduce_pd(U, A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_reduce_ps(A, B) __extension__ ({ \ +#define _mm_reduce_ps(A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm_mask_reduce_ps(W, U, A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm_maskz_reduce_ps(U, A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_reduce_ps(A, B) __extension__ ({ \ +#define _mm256_reduce_ps(A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_reduce_ps(W, U, A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm256_maskz_reduce_ps(U, A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2 (__m128 __A) { - return (__m256)__builtin_shufflevector((__v4sf)__A, - (__v4sf)_mm_undefined_ps(), + return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -991,7 +983,7 @@ _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -999,14 +991,14 @@ _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1014,7 +1006,7 @@ _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1022,15 +1014,14 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2 (__m128i __A) { - return (__m128i)__builtin_shufflevector((__v4si)__A, - (__v4si)_mm_undefined_si128(), + return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1038,7 +1029,7 @@ _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1046,15 +1037,14 @@ _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2 (__m128i __A) { - return (__m256i)__builtin_shufflevector((__v4si)__A, - (__v4si)_mm_undefined_si128(), + return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1062,7 +1052,7 @@ _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1070,14 +1060,14 @@ _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1085,7 +1075,7 @@ _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1093,106 +1083,103 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v4di)_mm256_setzero_si256()); } -#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ - (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_undefined_pd(), \ - ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1); }) - -#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)); }) - -#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ - (__v2df)_mm_setzero_pd()); }) - -#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ - (__v4di)_mm256_undefined_si256(), \ - ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1); }) - -#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)); }) - -#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_di()); }) - -#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(A), \ - (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ - ((imm) & 0x1) ? 0 : 4, \ - ((imm) & 0x1) ? 1 : 5, \ - ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3); }) - -#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_extractf64x2_pd(A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)-1) + +#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U)) + +#define _mm256_maskz_extractf64x2_pd(U, A, imm) \ + (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U)) + +#define _mm256_extracti64x2_epi64(A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + (int)(imm), \ + (__v2di)_mm_undefined_si128(), \ + (__mmask8)-1) + +#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + (int)(imm), \ + (__v2di)(__m128i)(W), \ + (__mmask8)(U)) + +#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ + (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + (int)(imm), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)(U)) + +#define _mm256_insertf64x2(A, B, imm) \ + (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ + (__v2df)(__m128d)(B), (int)(imm)) + +#define _mm256_mask_insertf64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_insertf64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(A), \ - (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ - ((imm) & 0x1) ? 0 : 4, \ - ((imm) & 0x1) ? 1 : 5, \ - ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3); }) +#define _mm256_inserti64x2(A, B, imm) \ + (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) -#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_inserti64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_inserti64x2(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm256_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm256_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_ps_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_ps_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm256_mask_fpclass_ps_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm256_fpclass_ps_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif diff --git a/lib/clang/7.0.0/include/avx512vlintrin.h b/lib/clang/8.0.0/include/avx512vlintrin.h similarity index 61% rename from lib/clang/7.0.0/include/avx512vlintrin.h rename to lib/clang/8.0.0/include/avx512vlintrin.h index 16e44c3..0ee1d00 100644 --- a/lib/clang/7.0.0/include/avx512vlintrin.h +++ b/lib/clang/8.0.0/include/avx512vlintrin.h @@ -28,13 +28,12 @@ #ifndef __AVX512VLINTRIN_H #define __AVX512VLINTRIN_H -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) -/* Doesn't require avx512vl, used in avx512dqintrin.h */ -static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) -_mm_setzero_di(void) { - return (__m128i)(__v2di){ 0LL, 0LL}; -} +typedef short __v2hi __attribute__((__vector_size__(4))); +typedef char __v4qi __attribute__((__vector_size__(4))); +typedef char __v2qi __attribute__((__vector_size__(2))); /* Integer compare */ @@ -238,7 +237,7 @@ _mm_setzero_di(void) { #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -246,7 +245,7 @@ _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -254,7 +253,7 @@ _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -262,7 +261,7 @@ _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -270,7 +269,7 @@ _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -278,7 +277,7 @@ _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -286,7 +285,7 @@ _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -294,7 +293,7 @@ _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -302,7 +301,7 @@ _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -310,7 +309,7 @@ _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -318,7 +317,7 @@ _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -326,7 +325,7 @@ _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -334,7 +333,7 @@ _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -342,7 +341,7 @@ _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -350,7 +349,7 @@ _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -358,7 +357,7 @@ _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -366,7 +365,7 @@ _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -374,7 +373,7 @@ _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -382,7 +381,7 @@ _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -390,7 +389,7 @@ _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -398,7 +397,7 @@ _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -406,7 +405,7 @@ _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -414,7 +413,7 @@ _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -422,7 +421,7 @@ _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -430,7 +429,7 @@ _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -438,7 +437,7 @@ _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -446,7 +445,7 @@ _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -454,7 +453,7 @@ _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -462,7 +461,7 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -470,13 +469,13 @@ _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -484,13 +483,13 @@ _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -498,14 +497,14 @@ _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -513,13 +512,13 @@ _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -527,13 +526,13 @@ _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -541,13 +540,13 @@ _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -555,13 +554,13 @@ _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -570,13 +569,13 @@ _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -584,13 +583,13 @@ _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -598,13 +597,13 @@ _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -612,14 +611,14 @@ _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -627,13 +626,13 @@ _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -641,13 +640,13 @@ _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -655,13 +654,13 @@ _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -669,13 +668,13 @@ _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -684,909 +683,973 @@ _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); } -#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi32_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu32_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi32_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu32_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_ps_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_ps_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_pd_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_pd_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_ps_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_ps_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_pd_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_pd_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd (-(__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 (-(__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps (-(__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 (-(__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) - __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) - __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + (__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) - __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) - __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) - __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) - __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) - __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddsubpd ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C), + (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) - __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C), + (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddsubps ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + -(__v2df) __B, + (__v2df) __C), + (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + -(__v4df) __B, + (__v4df) __C), + (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + -(__v4sf) __B, + (__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + -(__v8sf) __B, + (__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + -(__v2df) __B, + -(__v2df) __C), + (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, + __builtin_ia32_vfmaddpd ((__v2df) __A, + -(__v2df) __B, + -(__v2df) __C), + (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + -(__v4df) __B, + -(__v4df) __C), + (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, + __builtin_ia32_vfmaddpd256 ((__v4df) __A, + -(__v4df) __B, + -(__v4df) __C), + (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + -(__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, + __builtin_ia32_vfmaddps ((__v4sf) __A, + -(__v4sf) __B, + -(__v4sf) __C), + (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + -(__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, + __builtin_ia32_vfmaddps256 ((__v8sf) __A, + -(__v8sf) __B, + -(__v8sf) __C), + (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) @@ -1594,14 +1657,14 @@ _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) @@ -1609,14 +1672,14 @@ _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) @@ -1624,14 +1687,14 @@ _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) @@ -1639,14 +1702,14 @@ _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) @@ -1654,14 +1717,14 @@ _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) @@ -1669,14 +1732,14 @@ _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) @@ -1684,14 +1747,14 @@ _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) @@ -1699,126 +1762,126 @@ _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) @@ -1826,28 +1889,28 @@ _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) @@ -1855,21 +1918,21 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1877,14 +1940,14 @@ _mm_cvtpd_epu32 (__m128d __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1892,7 +1955,7 @@ _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1900,14 +1963,14 @@ _mm256_cvtpd_epu32 (__m256d __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1915,63 +1978,63 @@ _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -1979,14 +2042,14 @@ _mm_cvtps_epu32 (__m128 __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -1994,7 +2057,7 @@ _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2002,14 +2065,14 @@ _mm256_cvtps_epu32 (__m256 __A) { (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2017,14 +2080,14 @@ _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) @@ -2032,21 +2095,21 @@ _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2054,14 +2117,14 @@ _mm_cvttpd_epu32 (__m128d __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2069,7 +2132,7 @@ _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2077,14 +2140,14 @@ _mm256_cvttpd_epu32 (__m256d __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2092,35 +2155,35 @@ _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2128,14 +2191,14 @@ _mm_cvttps_epu32 (__m128 __A) { (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2143,7 +2206,7 @@ _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2151,14 +2214,14 @@ _mm256_cvttps_epu32 (__m256 __A) { (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2166,155 +2229,147 @@ _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd (__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd (__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps (__m128i __A) { - return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1); + return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { - return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_cvtepu32_ps(__A), + (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { - return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_cvtepu32_ps(__A), + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps (__m256i __A) { - return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); + return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { - return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_cvtepu32_ps(__A), + (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { - return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_cvtepu32_ps(__A), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) @@ -2322,14 +2377,14 @@ _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) @@ -2337,14 +2392,14 @@ _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) @@ -2352,14 +2407,14 @@ _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) @@ -2367,7 +2422,7 @@ _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) __W, @@ -2375,7 +2430,7 @@ _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) @@ -2384,7 +2439,7 @@ _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) __W, @@ -2392,7 +2447,7 @@ _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) @@ -2401,7 +2456,7 @@ _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) __W, @@ -2409,7 +2464,7 @@ _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) @@ -2418,7 +2473,7 @@ _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, @@ -2427,7 +2482,7 @@ _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, (__v4di) @@ -2436,14 +2491,14 @@ _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) @@ -2452,14 +2507,14 @@ _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) @@ -2468,7 +2523,7 @@ _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) __W, @@ -2476,7 +2531,7 @@ _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) @@ -2484,7 +2539,7 @@ _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, @@ -2493,7 +2548,7 @@ _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, (__v8si) @@ -2502,14 +2557,14 @@ _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) @@ -2517,14 +2572,14 @@ _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) @@ -2532,14 +2587,14 @@ _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) @@ -2547,14 +2602,14 @@ _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) @@ -2562,7 +2617,7 @@ _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd (__m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2570,14 +2625,14 @@ _mm_getexp_pd (__m128d __A) { (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2585,7 +2640,7 @@ _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd (__m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2593,14 +2648,14 @@ _mm256_getexp_pd (__m256d __A) { (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2608,7 +2663,7 @@ _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps (__m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2616,14 +2671,14 @@ _mm_getexp_ps (__m128 __A) { (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2631,7 +2686,7 @@ _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps (__m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2639,14 +2694,14 @@ _mm256_getexp_ps (__m256 __A) { (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2654,643 +2709,579 @@ _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { - return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_abs_epi64(__A), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_abs_epi64(__A), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { - return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_abs_epi64(__A), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_abs_epi64(__A), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_max_epi64 (__m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_max_epi64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_max_epi64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_max_epi64(__A, __B), + (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - __M); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_max_epi64 (__m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, __M); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_max_epi64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_max_epi64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_max_epi64(__A, __B), + (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - __M); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_max_epu64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_max_epu64(__A, __B), + (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_max_epu64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, __M); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_max_epu64(__A, __B), + (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_min_epi64(__A, __B), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_min_epi64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, __M); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_min_epi64(__A, __B), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_min_epi64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, __M); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_min_epu64(__A, __B), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_min_epu64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, __M); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_min_epu64(__A, __B), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_min_epu64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -#define _mm_roundscale_pd(A, imm) __extension__ ({ \ +#define _mm_roundscale_pd(A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_roundscale_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ +#define _mm_maskz_roundscale_pd(U, A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_roundscale_pd(A, imm) __extension__ ({ \ +#define _mm256_roundscale_pd(A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_roundscale_pd(W, U, A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_roundscale_pd(U, A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_roundscale_ps(A, imm) __extension__ ({ \ +#define _mm_roundscale_ps(A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_roundscale_ps(W, U, A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ +#define _mm_maskz_roundscale_ps(U, A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_roundscale_ps(A, imm) __extension__ ({ \ +#define _mm256_roundscale_ps(A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_roundscale_ps(W, U, A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_roundscale_ps(U, A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, @@ -3299,1258 +3290,1125 @@ _mm_scalef_pd (__m128d __A, __m128d __B) { (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_scalef_pd (__m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) -1); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, - __m256d __B) { - return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_scalef_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_scalef_ps (__m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, - __m256 __B) { - return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); -} - -#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ - (__v2di)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ - (__v2di)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ - (__v2di)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ - (__v2di)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ - (__v4di)(__m256i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ - (__v4di)(__m256i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ - (__v4di)(__m256i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ - (__v4di)(__m256i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ - (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ - (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ - (__v2di)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ - (__v2di)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ - (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ - (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ - (__v4di)(__m256i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ - (__v4di)(__m256i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ - (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ - (int)(scale)); }) - -#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ - (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ - (int)(scale)); }) - -#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ - (__v8si)(__m256i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ - (__v8si)(__m256i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)__W); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)__W); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)__W); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)_mm_setzero_pd()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)__W); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sub_pd(__A, __B), - (__v2df)__W); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sub_pd(__A, __B), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sub_pd(__A, __B), - (__v4df)__W); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sub_pd(__A, __B), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sub_ps(__A, __B), - (__v4sf)__W); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sub_ps(__A, __B), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sub_ps(__A, __B), - (__v8sf)__W); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sub_ps(__A, __B), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, - (__v4si) __I - /* idx */ , - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, - __mmask8 __U, __m256i __B) { - return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, - (__v8si) __I - /* idx */ , - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, - __m128d __B) { - return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, - (__v2di) __I - /* idx */ , - (__v2df) __B, - (__mmask8) - __U); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, - __m256d __B) { - return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, - (__v4di) __I - /* idx */ , - (__v4df) __B, - (__mmask8) - __U); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, - __m128 __B) { - return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, - (__v4si) __I - /* idx */ , - (__v4sf) __B, - (__mmask8) __U); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, - __m256 __B) { - return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, - (__v8si) __I - /* idx */ , - (__v8sf) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, - (__v2di) __I - /* idx */ , - (__v2di) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, - __mmask8 __U, __m256i __B) { - return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, - (__v4di) __I - /* idx */ , - (__v4di) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I - /* idx */ , - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I - /* idx */ , - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I - /* idx */ , - (__v4si) __A, - (__v4si) __B, - (__mmask8) - __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I - /* idx */ , - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, - __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I - /* idx */ , - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, - __m256i __I, __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I - /* idx */ , - (__v8si) __A, - (__v8si) __B, - (__mmask8) - __U); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) { - return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I - /* idx */ , - (__v2df) __A, - (__v2df) __B, - (__mmask8) - - 1); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, - __m128d __B) { - return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I - /* idx */ , - (__v2df) __A, - (__v2df) __B, - (__mmask8) - __U); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, - __m128d __B) { - return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I - /* idx */ , - (__v2df) __A, - (__v2df) __B, - (__mmask8) - __U); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) { - return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I - /* idx */ , - (__v4df) __A, - (__v4df) __B, - (__mmask8) - - 1); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, - __m256d __B) { - return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I - /* idx */ , - (__v4df) __A, - (__v4df) __B, - (__mmask8) - __U); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, - __m256d __B) { - return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I - /* idx */ , - (__v4df) __A, - (__v4df) __B, - (__mmask8) - __U); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) { - return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I - /* idx */ , - (__v4sf) __A, - (__v4sf) __B, - (__mmask8) -1); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, - __m128 __B) { - return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I - /* idx */ , - (__v4sf) __A, - (__v4sf) __B, - (__mmask8) __U); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, - __m128 __B) { - return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I - /* idx */ , - (__v4sf) __A, - (__v4sf) __B, - (__mmask8) - __U); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) { - return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I - /* idx */ , - (__v8sf) __A, - (__v8sf) __B, - (__mmask8) -1); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, - __m256 __B) { - return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I - /* idx */ , - (__v8sf) __A, - (__v8sf) __B, - (__mmask8) __U); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, - __m256 __B) { - return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I - /* idx */ , - (__v8sf) __A, - (__v8sf) __B, - (__mmask8) - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I - /* idx */ , - (__v2di) __A, - (__v2di) __B, - (__mmask8) -1); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I - /* idx */ , - (__v2di) __A, - (__v2di) __B, - (__mmask8) __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I - /* idx */ , - (__v2di) __A, - (__v2di) __B, - (__mmask8) - __U); -} - - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I - /* idx */ , - (__v4di) __A, - (__v4di) __B, - (__mmask8) -1); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, - __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I - /* idx */ , - (__v4di) __A, - (__v4di) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, - __m256i __I, __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I - /* idx */ , - (__v4di) __A, - (__v4di) __B, - (__mmask8) - __U); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi8_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi8_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi8_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi8_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi8_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi8_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi8_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi8_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi32_epi64(__X), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi32_epi64(__X), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi32_epi64(__X), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi32_epi64(__X), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi16_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi16_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi16_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi16_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi16_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi16_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi16_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi16_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu8_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu8_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu8_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu8_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu8_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu8_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu8_epi64(__A), - (__v4di)__W); + (__v2df) __W, + (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu8_epi64(__A), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m128d __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu32_epi64(__X), - (__v2di)__W); +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_scalef_pd (__m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu32_epi64(__X), - (__v2di)_mm_setzero_si128()); +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, + __m256d __B) { + return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu32_epi64(__X), - (__v4di)__W); +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu32_epi64(__X), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_scalef_ps (__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu16_epi32(__A), - (__v4si)__W); +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu16_epi32(__A), - (__v4si)_mm_setzero_si128()); +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu16_epi32(__A), - (__v8si)__W); +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_scalef_ps (__m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu16_epi32(__A), - (__v8si)_mm256_setzero_si256()); +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, + __m256 __B) { + return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu16_epi64(__A), - (__v2di)__W); +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu16_epi64(__A), - (__v2di)_mm_setzero_si128()); -} +#define _mm_i64scatter_pd(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ + (__v2di)(__m128i)(index), \ + (__v2df)(__m128d)(v1), (int)(scale)) -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu16_epi64(__A), - (__v4di)__W); -} +#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ + (__v2di)(__m128i)(index), \ + (__v2df)(__m128d)(v1), (int)(scale)) -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu16_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} +#define _mm_i64scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ + (__v2di)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) +#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ + (__v2di)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) -#define _mm_rol_epi32(a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)-1); }) +#define _mm256_i64scatter_pd(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ + (__v4di)(__m256i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) -#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)(__m128i)(w), (__mmask8)(u)); }) +#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ + (__v4di)(__m256i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) -#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(u)); }) +#define _mm256_i64scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ + (__v4di)(__m256i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) -#define _mm256_rol_epi32(a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ + (__v4di)(__m256i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) -#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)(__m256i)(w), (__mmask8)(u)); }) +#define _mm_i64scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ + (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) -#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(u)); }) +#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ + (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) -#define _mm_rol_epi64(a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) +#define _mm_i64scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ + (__v2di)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)(__m128i)(w), (__mmask8)(u)); }) +#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ + (__v2di)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)(u)); }) +#define _mm256_i64scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ + (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) -#define _mm256_rol_epi64(a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ + (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) -#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)(__m256i)(w), (__mmask8)(u)); }) +#define _mm256_i64scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ + (__v4di)(__m256i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(u)); }) +#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ + (__v4di)(__m256i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) -static __inline__ __m128i __DEFAULT_FN_ATTRS +#define _mm_i32scatter_pd(addr, index, v1, scale) \ + __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v2df)(__m128d)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v2df)(__m128d)(v1), (int)(scale)) + +#define _mm_i32scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) + +#define _mm256_i32scatter_pd(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) + +#define _mm256_i32scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) + +#define _mm_i32scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) + +#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) + +#define _mm_i32scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) + +#define _mm256_i32scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ + (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ + (int)(scale)) + +#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ + (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ + (int)(scale)) + +#define _mm256_i32scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ + (__v8si)(__m256i)(index), \ + (__v8si)(__m256i)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ + (__v8si)(__m256i)(index), \ + (__v8si)(__m256i)(v1), (int)(scale)) + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sqrt_pd(__A), + (__v2df)__W); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sqrt_pd(__A), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sqrt_pd(__A), + (__v4df)__W); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sqrt_pd(__A), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sqrt_ps(__A), + (__v4sf)__W); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sqrt_ps(__A), + (__v4sf)_mm_setzero_ps()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sqrt_ps(__A), + (__v8sf)__W); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sqrt_ps(__A), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sub_pd(__A, __B), + (__v2df)__W); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sub_pd(__A, __B), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sub_pd(__A, __B), + (__v4df)__W); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sub_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sub_ps(__A, __B), + (__v4sf)__W); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sub_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sub_ps(__A, __B), + (__v8sf)__W); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sub_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { + return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, + (__v4si)__B); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)__A); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)__I); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { + return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, + (__v8si) __B); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)__A); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)__I); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, + (__v2df)__B); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)__A); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)(__m128d)__I); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS128 + _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { + return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, + (__v4df)__B); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)__A); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)(__m256d)__I); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS256 + _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, + (__v4sf)__B); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)__A); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)(__m128)__I); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS128 + _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)_mm_setzero_ps()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { + return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, + (__v8sf) __B); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)__A); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, + __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)(__m256)__I); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS256 + _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, + __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { + return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, + (__v2di)__B); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)__A); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)__I); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)_mm_setzero_si128()); + } + + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { + return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, + (__v4di) __B); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)__A); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)__I); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi8_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi8_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi8_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi8_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi8_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi8_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi8_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi8_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi32_epi64(__X), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi32_epi64(__X), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi32_epi64(__X), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi32_epi64(__X), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi16_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi16_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi16_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi16_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi16_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi16_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi16_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi16_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu8_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu8_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu8_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu8_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu8_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu8_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu8_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu8_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu32_epi64(__X), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu32_epi64(__X), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu32_epi64(__X), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu32_epi64(__X), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu16_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu16_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu16_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu16_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu16_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS128 + _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu16_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu16_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS256 + _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu16_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + +#define _mm_rol_epi32(a, b) \ + (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)) + +#define _mm_mask_rol_epi32(w, u, a, b) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_rol_epi32((a), (b)), \ + (__v4si)(__m128i)(w)) + +#define _mm_maskz_rol_epi32(u, a, b) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_rol_epi32((a), (b)), \ + (__v4si)_mm_setzero_si128()) + +#define _mm256_rol_epi32(a, b) \ + (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)) + +#define _mm256_mask_rol_epi32(w, u, a, b) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_rol_epi32((a), (b)), \ + (__v8si)(__m256i)(w)) + +#define _mm256_maskz_rol_epi32(u, a, b) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_rol_epi32((a), (b)), \ + (__v8si)_mm256_setzero_si256()) + +#define _mm_rol_epi64(a, b) \ + (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)) + +#define _mm_mask_rol_epi64(w, u, a, b) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_rol_epi64((a), (b)), \ + (__v2di)(__m128i)(w)) + +#define _mm_maskz_rol_epi64(u, a, b) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_rol_epi64((a), (b)), \ + (__v2di)_mm_setzero_si128()) + +#define _mm256_rol_epi64(a, b) \ + (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)) + +#define _mm256_mask_rol_epi64(w, u, a, b) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_rol_epi64((a), (b)), \ + (__v4di)(__m256i)(w)) + +#define _mm256_maskz_rol_epi64(u, a, b) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_rol_epi64((a), (b)), \ + (__v4di)_mm256_setzero_si256()) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_rolv_epi32(__A, __B), + (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_rolv_epi32(__A, __B), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_rolv_epi32(__A, __B), + (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_rolv_epi32(__A, __B), + (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_rolv_epi64(__A, __B), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_rolv_epi64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_rolv_epi64(__A, __B), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_rolv_epi64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -#define _mm_ror_epi32(A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)-1); }) +#define _mm_ror_epi32(a, b) \ + (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)) -#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)(W), (__mmask8)(U)); }) +#define _mm_mask_ror_epi32(w, u, a, b) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_ror_epi32((a), (b)), \ + (__v4si)(__m128i)(w)) -#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) +#define _mm_maskz_ror_epi32(u, a, b) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_ror_epi32((a), (b)), \ + (__v4si)_mm_setzero_si128()) -#define _mm256_ror_epi32(A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +#define _mm256_ror_epi32(a, b) \ + (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)) -#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)(W), (__mmask8)(U)); }) +#define _mm256_mask_ror_epi32(w, u, a, b) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_ror_epi32((a), (b)), \ + (__v8si)(__m256i)(w)) -#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) +#define _mm256_maskz_ror_epi32(u, a, b) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_ror_epi32((a), (b)), \ + (__v8si)_mm256_setzero_si256()) -#define _mm_ror_epi64(A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) +#define _mm_ror_epi64(a, b) \ + (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)) -#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)(W), (__mmask8)(U)); }) +#define _mm_mask_ror_epi64(w, u, a, b) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_ror_epi64((a), (b)), \ + (__v2di)(__m128i)(w)) -#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ - (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)(U)); }) +#define _mm_maskz_ror_epi64(u, a, b) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_ror_epi64((a), (b)), \ + (__v2di)_mm_setzero_si128()) -#define _mm256_ror_epi64(A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +#define _mm256_ror_epi64(a, b) \ + (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)) -#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ - (__v4di)(__m256i)(W), (__mmask8)(U)); }) +#define _mm256_mask_ror_epi64(w, u, a, b) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_ror_epi64((a), (b)), \ + (__v4di)(__m256i)(w)) -#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ - (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) +#define _mm256_maskz_ror_epi64(u, a, b) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_ror_epi64((a), (b)), \ + (__v4di)_mm256_setzero_si256()) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4558,7 +4416,7 @@ _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4566,7 +4424,7 @@ _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4574,7 +4432,7 @@ _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4582,7 +4440,7 @@ _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4590,7 +4448,7 @@ _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4598,7 +4456,7 @@ _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4606,7 +4464,7 @@ _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4614,7 +4472,7 @@ _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4622,15 +4480,15 @@ _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4638,7 +4496,7 @@ _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4646,7 +4504,7 @@ _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4654,15 +4512,15 @@ _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_slli_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4670,7 +4528,7 @@ _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4678,127 +4536,95 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_rorv_epi32(__A, __B), + (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_rorv_epi32(__A, __B), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_rorv_epi32(__A, __B), + (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_rorv_epi32(__A, __B), + (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_rorv_epi64(__A, __B), + (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_rorv_epi64(__A, __B), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_rorv_epi64(__A, __B), + (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_rorv_epi64(__A, __B), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4806,15 +4632,15 @@ _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sllv_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4822,7 +4648,7 @@ _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4830,7 +4656,7 @@ _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4838,7 +4664,7 @@ _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4846,7 +4672,7 @@ _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4854,7 +4680,7 @@ _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4862,7 +4688,7 @@ _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4870,15 +4696,15 @@ _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srlv_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4886,7 +4712,7 @@ _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4894,7 +4720,7 @@ _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4902,7 +4728,7 @@ _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4910,7 +4736,7 @@ _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4918,7 +4744,7 @@ _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4926,7 +4752,7 @@ _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4934,7 +4760,7 @@ _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4942,7 +4768,7 @@ _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4950,7 +4776,7 @@ _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4958,7 +4784,7 @@ _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4966,7 +4792,7 @@ _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4974,7 +4800,7 @@ _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4982,7 +4808,7 @@ _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4990,7 +4816,7 @@ _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4998,15 +4824,15 @@ _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5014,7 +4840,7 @@ _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5022,7 +4848,7 @@ _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -5030,15 +4856,15 @@ _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srli_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5046,7 +4872,7 @@ _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5054,7 +4880,7 @@ _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -5062,7 +4888,7 @@ _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -5070,7 +4896,7 @@ _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -5078,7 +4904,7 @@ _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -5086,13 +4912,13 @@ _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -5100,21 +4926,21 @@ _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srav_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5122,7 +4948,7 @@ _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5130,7 +4956,7 @@ _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -5138,7 +4964,7 @@ _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) (__v4si) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -5147,7 +4973,7 @@ _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -5155,7 +4981,7 @@ _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) (__v8si) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -5163,7 +4989,7 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) (__v8si) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -5172,7 +4998,7 @@ _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -5182,7 +5008,7 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5191,7 +5017,7 @@ _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5201,7 +5027,7 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, @@ -5209,7 +5035,7 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, @@ -5217,7 +5043,7 @@ _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, @@ -5225,15 +5051,15 @@ _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) (__v2di) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __A, - (__v2di) _mm_setzero_di ()); + (__v2di) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5241,7 +5067,7 @@ _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) (__v4di) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5249,7 +5075,7 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) (__v4di) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, @@ -5258,17 +5084,17 @@ _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5277,7 +5103,7 @@ _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5287,7 +5113,7 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, @@ -5295,7 +5121,7 @@ _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, @@ -5303,7 +5129,7 @@ _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5311,7 +5137,7 @@ _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5319,7 +5145,7 @@ _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5327,7 +5153,7 @@ _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5335,7 +5161,7 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5343,7 +5169,7 @@ _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5351,7 +5177,7 @@ _mm_maskz_set1_epi32( __mmask8 __M, int __A) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5359,7 +5185,7 @@ _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5368,8 +5194,7 @@ _mm256_maskz_set1_epi32( __mmask8 __M, int __A) } -#ifdef __x86_64__ -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5377,7 +5202,7 @@ _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5385,7 +5210,7 @@ _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, @@ -5393,89 +5218,87 @@ _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) _mm256_setzero_si256()); } - -#endif -#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_pd(A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), \ - (int)(imm), (__mmask8)(U)); }) + (int)(imm), (__mmask8)(U)) -#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm256_fixupimm_pd(A, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U)); }) + (int)(imm), (__mmask8)(U)) -#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_ps(A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm256_fixupimm_ps(A, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5483,7 +5306,7 @@ _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5492,7 +5315,7 @@ _mm_maskz_load_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5500,7 +5323,7 @@ _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5509,7 +5332,7 @@ _mm256_maskz_load_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5517,7 +5340,7 @@ _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5526,7 +5349,7 @@ _mm_maskz_load_ps (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5534,7 +5357,7 @@ _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5543,7 +5366,7 @@ _mm256_maskz_load_ps (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5551,7 +5374,7 @@ _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5560,7 +5383,7 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5568,7 +5391,7 @@ _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5577,7 +5400,7 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5585,7 +5408,7 @@ _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5594,7 +5417,7 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5602,7 +5425,7 @@ _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5611,7 +5434,7 @@ _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5619,7 +5442,7 @@ _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5628,7 +5451,7 @@ _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5636,7 +5459,7 @@ _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5645,7 +5468,7 @@ _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5653,7 +5476,7 @@ _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5662,7 +5485,7 @@ _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5670,7 +5493,7 @@ _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5679,7 +5502,7 @@ _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeapd128_mask ((__v2df *) __P, @@ -5687,7 +5510,7 @@ _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeapd256_mask ((__v4df *) __P, @@ -5695,7 +5518,7 @@ _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeaps128_mask ((__v4sf *) __P, @@ -5703,7 +5526,7 @@ _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeaps256_mask ((__v8sf *) __P, @@ -5711,7 +5534,7 @@ _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqudi128_mask ((__v2di *) __P, @@ -5719,7 +5542,7 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqudi256_mask ((__v4di *) __P, @@ -5727,7 +5550,7 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqusi128_mask ((__v4si *) __P, @@ -5735,7 +5558,7 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqusi256_mask ((__v8si *) __P, @@ -5743,7 +5566,7 @@ _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeupd128_mask ((__v2df *) __P, @@ -5751,7 +5574,7 @@ _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeupd256_mask ((__v4df *) __P, @@ -5759,7 +5582,7 @@ _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeups128_mask ((__v4sf *) __P, @@ -5767,7 +5590,7 @@ _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeups256_mask ((__v8sf *) __P, @@ -5776,7 +5599,7 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5784,7 +5607,7 @@ _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5792,7 +5615,7 @@ _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5800,7 +5623,7 @@ _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5808,7 +5631,7 @@ _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5816,7 +5639,7 @@ _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5824,7 +5647,7 @@ _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5832,7 +5655,7 @@ _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5840,7 +5663,7 @@ _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5848,7 +5671,7 @@ _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5856,7 +5679,7 @@ _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5864,7 +5687,7 @@ _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5872,7 +5695,7 @@ _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5880,7 +5703,7 @@ _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5888,7 +5711,7 @@ _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5896,7 +5719,7 @@ _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5904,7 +5727,7 @@ _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5913,7 +5736,7 @@ _mm_rcp14_pd (__m128d __A) (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5921,7 +5744,7 @@ _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5930,7 +5753,7 @@ _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5939,7 +5762,7 @@ _mm256_rcp14_pd (__m256d __A) (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5947,7 +5770,7 @@ _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5956,7 +5779,7 @@ _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps (__m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5965,7 +5788,7 @@ _mm_rcp14_ps (__m128 __A) (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5973,7 +5796,7 @@ _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5982,7 +5805,7 @@ _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps (__m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5991,7 +5814,7 @@ _mm256_rcp14_ps (__m256 __A) (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5999,7 +5822,7 @@ _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -6008,47 +5831,47 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__mmask8) __U); } -#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm_mask_permute_pd(W, U, X, C) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)(__m128d)(W)); }) + (__v2df)(__m128d)(W)) -#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm_maskz_permute_pd(U, X, C) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permute_pd(W, U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permute_pd(U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ +#define _mm_mask_permute_ps(W, U, X, C) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)(__m128)(W)); }) + (__v4sf)(__m128)(W)) -#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm_maskz_permute_ps(U, X, C) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) -#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permute_ps(W, U, X, C) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permute_ps(U, X, C) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -6056,7 +5879,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -6064,7 +5887,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -6072,7 +5895,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -6080,7 +5903,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -6088,7 +5911,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -6096,7 +5919,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -6104,7 +5927,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -6112,115 +5935,115 @@ _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6228,7 +6051,7 @@ _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6236,7 +6059,7 @@ _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6244,7 +6067,7 @@ _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6252,7 +6075,7 @@ _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6260,15 +6083,15 @@ _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6276,7 +6099,7 @@ _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6284,7 +6107,7 @@ _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6292,7 +6115,7 @@ _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6300,7 +6123,7 @@ _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6308,7 +6131,7 @@ _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6316,7 +6139,7 @@ _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6324,15 +6147,15 @@ _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6340,7 +6163,7 @@ _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6348,7 +6171,7 @@ _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6356,7 +6179,7 @@ _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6364,7 +6187,7 @@ _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6372,7 +6195,7 @@ _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6380,7 +6203,7 @@ _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6388,7 +6211,7 @@ _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6396,7 +6219,7 @@ _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6404,7 +6227,7 @@ _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6412,13 +6235,13 @@ _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6426,21 +6249,21 @@ _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6448,7 +6271,7 @@ _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6456,13 +6279,13 @@ _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6470,21 +6293,21 @@ _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6492,7 +6315,7 @@ _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6500,198 +6323,178 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) (__v4di)_mm256_setzero_si256()); } -#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm_ternarylogic_epi32(A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm256_ternarylogic_epi32(A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm_ternarylogic_epi64(A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm256_ternarylogic_epi64(A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), \ - 0 + ((((imm) >> 0) & 0x1) * 4), \ - 1 + ((((imm) >> 0) & 0x1) * 4), \ - 2 + ((((imm) >> 0) & 0x1) * 4), \ - 3 + ((((imm) >> 0) & 0x1) * 4), \ - 8 + ((((imm) >> 1) & 0x1) * 4), \ - 9 + ((((imm) >> 1) & 0x1) * 4), \ - 10 + ((((imm) >> 1) & 0x1) * 4), \ - 11 + ((((imm) >> 1) & 0x1) * 4)); }) +#define _mm256_shuffle_f32x4(A, B, imm) \ + (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(imm)) -#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), \ - 0 + ((((imm) >> 0) & 0x1) * 2), \ - 1 + ((((imm) >> 0) & 0x1) * 2), \ - 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) +#define _mm256_shuffle_f64x2(A, B, imm) \ + (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(imm)) -#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)(__m256)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - 0 + ((((imm) >> 0) & 0x1) * 2), \ - 1 + ((((imm) >> 0) & 0x1) * 2), \ - 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) +#define _mm256_shuffle_i32x4(A, B, imm) \ + (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(imm)) -#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)(__m256)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - 0 + ((((imm) >> 0) & 0x1) * 2), \ - 1 + ((((imm) >> 0) & 0x1) * 2), \ - 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) +#define _mm256_shuffle_i64x2(A, B, imm) \ + (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(imm)) -#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)(__m256)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ +#define _mm_mask_shuffle_pd(W, U, A, B, M) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)(__m128d)(W)); }) + (__v2df)(__m128d)(W)) -#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm_maskz_shuffle_pd(U, A, B, M) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ +#define _mm256_mask_shuffle_pd(W, U, A, B, M) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm256_maskz_shuffle_pd(U, A, B, M) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ +#define _mm_mask_shuffle_ps(W, U, A, B, M) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)(__m128)(W)); }) + (__v4sf)(__m128)(W)) -#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm_maskz_shuffle_ps(U, A, B, M) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) -#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ +#define _mm256_mask_shuffle_ps(W, U, A, B, M) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm256_maskz_shuffle_ps(U, A, B, M) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6700,7 +6503,7 @@ _mm_rsqrt14_pd (__m128d __A) (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6708,7 +6511,7 @@ _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6717,7 +6520,7 @@ _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6726,7 +6529,7 @@ _mm256_rsqrt14_pd (__m256d __A) (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6734,7 +6537,7 @@ _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6743,7 +6546,7 @@ _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps (__m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6752,7 +6555,7 @@ _mm_rsqrt14_ps (__m128 __A) (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6760,7 +6563,7 @@ _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6769,7 +6572,7 @@ _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps (__m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6778,7 +6581,7 @@ _mm256_rsqrt14_ps (__m256 __A) (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6786,7 +6589,7 @@ _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6795,14 +6598,14 @@ _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6810,7 +6613,7 @@ _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6818,14 +6621,14 @@ _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6833,7 +6636,7 @@ _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6841,7 +6644,7 @@ _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6849,7 +6652,7 @@ _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6857,7 +6660,7 @@ _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6865,7 +6668,7 @@ _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6873,7 +6676,7 @@ _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6881,7 +6684,7 @@ _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6889,7 +6692,7 @@ _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6897,7 +6700,7 @@ _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6905,7 +6708,7 @@ _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6913,7 +6716,7 @@ _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6921,7 +6724,7 @@ _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6929,7 +6732,7 @@ _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6937,7 +6740,7 @@ _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6945,7 +6748,7 @@ _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6953,7 +6756,7 @@ _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) (__v4di) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6961,14 +6764,14 @@ _mm_cvtsepi32_epi8 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6976,13 +6779,13 @@ _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -6990,14 +6793,14 @@ _mm256_cvtsepi32_epi8 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -7005,13 +6808,13 @@ _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -7019,7 +6822,7 @@ _mm_cvtsepi32_epi16 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -7027,7 +6830,7 @@ _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -7035,13 +6838,13 @@ _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -7049,14 +6852,14 @@ _mm256_cvtsepi32_epi16 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -7064,13 +6867,13 @@ _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -7078,14 +6881,14 @@ _mm_cvtsepi64_epi8 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -7093,13 +6896,13 @@ _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -7107,14 +6910,14 @@ _mm256_cvtsepi64_epi8 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -7122,13 +6925,13 @@ _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -7136,14 +6939,14 @@ _mm_cvtsepi64_epi32 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -7151,13 +6954,13 @@ _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7165,7 +6968,7 @@ _mm256_cvtsepi64_epi32 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7173,7 +6976,7 @@ _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7181,13 +6984,13 @@ _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -7195,14 +6998,14 @@ _mm_cvtsepi64_epi16 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -7210,13 +7013,13 @@ _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7224,14 +7027,14 @@ _mm256_cvtsepi64_epi16 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7239,13 +7042,13 @@ _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7253,7 +7056,7 @@ _mm_cvtusepi32_epi8 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7261,7 +7064,7 @@ _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7269,13 +7072,13 @@ _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7283,7 +7086,7 @@ _mm256_cvtusepi32_epi8 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7291,7 +7094,7 @@ _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7299,13 +7102,13 @@ _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7313,14 +7116,14 @@ _mm_cvtusepi32_epi16 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7328,13 +7131,13 @@ _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7342,14 +7145,14 @@ _mm256_cvtusepi32_epi16 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7357,13 +7160,13 @@ _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7371,7 +7174,7 @@ _mm_cvtusepi64_epi8 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7379,7 +7182,7 @@ _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7387,13 +7190,13 @@ _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7401,7 +7204,7 @@ _mm256_cvtusepi64_epi8 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7409,7 +7212,7 @@ _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7417,13 +7220,13 @@ _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7431,14 +7234,14 @@ _mm_cvtusepi64_epi32 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7446,13 +7249,13 @@ _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7460,14 +7263,14 @@ _mm256_cvtusepi64_epi32 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7475,13 +7278,13 @@ _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7489,14 +7292,14 @@ _mm_cvtusepi64_epi16 (__m128i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7504,13 +7307,13 @@ _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7518,14 +7321,14 @@ _mm256_cvtusepi64_epi16 (__m256i __A) (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7533,28 +7336,28 @@ _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { - return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, @@ -7563,28 +7366,29 @@ _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8si)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, @@ -7592,28 +7396,28 @@ _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, @@ -7621,28 +7425,26 @@ _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi)_mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, @@ -7650,28 +7452,28 @@ _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, @@ -7679,28 +7481,28 @@ _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, @@ -7708,28 +7510,27 @@ _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, @@ -7737,50 +7538,49 @@ _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) __O, __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, - (__v8hi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7788,7 +7588,7 @@ _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7796,28 +7596,28 @@ _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, @@ -7825,351 +7625,319 @@ _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ - (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_undefined_ps(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) - -#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)); }) - -#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ - (__v4sf)_mm_setzero_ps()); }) - -#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ - (__v8si)_mm256_undefined_si256(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) - -#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)); }) - -#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ - (__v4si)_mm_setzero_si128()); }) - -#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(A), \ - (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) - -#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_extractf32x4_ps(A, imm) \ + (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)-1) + +#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ + (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U)) + +#define _mm256_maskz_extractf32x4_ps(U, A, imm) \ + (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U)) + +#define _mm256_extracti32x4_epi32(A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)_mm_undefined_si128(), \ + (__mmask8)-1) + +#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)(__m128i)(W), \ + (__mmask8)(U)) + +#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ + (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)(U)) + +#define _mm256_insertf32x4(A, B, imm) \ + (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ + (__v4sf)(__m128)(B), (int)(imm)) + +#define _mm256_mask_insertf32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_insertf32x4(U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) - -#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v8si)(A), \ - (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) - -#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ + (__v8sf)_mm256_setzero_ps()) + +#define _mm256_inserti32x4(A, B, imm) \ + (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) + +#define _mm256_mask_inserti32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_inserti32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_getmant_pd(A, B, C) __extension__({\ +#define _mm_getmant_pd(A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ +#define _mm_mask_getmant_pd(W, U, A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ +#define _mm_maskz_getmant_pd(U, A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_getmant_pd(A, B, C) __extension__ ({ \ +#define _mm256_getmant_pd(A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_getmant_pd(W, U, A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_getmant_pd(U, A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm_getmant_ps(A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_getmant_ps(W, U, A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_getmant_ps(U, A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm256_getmant_ps(A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_getmant_ps(W, U, A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_getmant_ps(U, A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ (double const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ (long long const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ (double const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ (long long const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ (double const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ (long long const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ (double const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ (long long const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ (float const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ (int const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_permutex_pd(X, C) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ - (__v4df)_mm256_undefined_pd(), \ - ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) +#define _mm256_permutex_pd(X, C) \ + (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)) -#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permutex_pd(W, U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permutex_pd(U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_permutex_epi64(X, C) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ - (__v4di)_mm256_undefined_si256(), \ - ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) +#define _mm256_permutex_epi64(X, C) \ + (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) -#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permutex_epi64(W, U, X, C) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)(__m256i)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permutex_epi64(U, X, C) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, - (__v4df) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, - (__v4df) __W, - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_permutexvar_pd(__X, __Y), + (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, - (__v4df) _mm256_setzero_pd (), - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_permutexvar_pd(__X, __Y), + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, - (__v4di) _mm256_setzero_si256 (), - (__mmask8) __M); + return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, - (__v4di) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_permutexvar_epi64(__X, __Y), + (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, - (__v4di) __W, - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_permutexvar_epi64(__X, __Y), + (__v4di)__W); } #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8177,7 +7945,7 @@ _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8187,7 +7955,7 @@ _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -8196,7 +7964,7 @@ _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -8204,81 +7972,63 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(A), \ - ((int)(imm) & 0x3) + 0, \ - ((int)(imm) & 0x3) + 1, \ - ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3); }) +#define _mm_alignr_epi32(A, B, imm) \ + (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) -#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_alignr_epi32(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)(__m128i)(W)); }) + (__v4si)(__m128i)(W)) -#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ +#define _mm_maskz_alignr_epi32(U, A, B, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)_mm_setzero_si128()); }) - -#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(A), \ - ((int)(imm) & 0x7) + 0, \ - ((int)(imm) & 0x7) + 1, \ - ((int)(imm) & 0x7) + 2, \ - ((int)(imm) & 0x7) + 3, \ - ((int)(imm) & 0x7) + 4, \ - ((int)(imm) & 0x7) + 5, \ - ((int)(imm) & 0x7) + 6, \ - ((int)(imm) & 0x7) + 7); }) - -#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ + (__v4si)_mm_setzero_si128()) + +#define _mm256_alignr_epi32(A, B, imm) \ + (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(imm)) + +#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)(__m256i)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_alignr_epi32(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(A), \ - ((int)(imm) & 0x1) + 0, \ - ((int)(imm) & 0x1) + 1); }) +#define _mm_alignr_epi64(A, B, imm) \ + (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) -#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_alignr_epi64(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)(__m128i)(W)); }) + (__v2di)(__m128i)(W)) -#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ +#define _mm_maskz_alignr_epi64(U, A, B, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)_mm_setzero_di()); }) + (__v2di)_mm_setzero_si128()) -#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(A), \ - ((int)(imm) & 0x3) + 0, \ - ((int)(imm) & 0x3) + 1, \ - ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3); }) +#define _mm256_alignr_epi64(A, B, imm) \ + (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(imm)) -#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)(__m256i)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_alignr_epi64(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8286,7 +8036,7 @@ _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8294,7 +8044,7 @@ _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8302,7 +8052,7 @@ _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8310,7 +8060,7 @@ _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8318,7 +8068,7 @@ _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8326,7 +8076,7 @@ _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8334,7 +8084,7 @@ _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8342,27 +8092,27 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_setzero_ps()); } -#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ +#define _mm256_mask_shuffle_epi32(W, U, A, I) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)(__m256i)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ +#define _mm256_maskz_shuffle_epi32(U, A, I) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ +#define _mm_mask_shuffle_epi32(W, U, A, I) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)(__m128i)(W)); }) + (__v4si)(__m128i)(W)) -#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ +#define _mm_maskz_shuffle_epi32(U, A, I) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)_mm_setzero_si128()); }) + (__v4si)_mm_setzero_si128()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8370,7 +8120,7 @@ _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) (__v2df) __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8378,7 +8128,7 @@ _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) (__v2df) _mm_setzero_pd ()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8386,7 +8136,7 @@ _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) (__v4df) __W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8394,7 +8144,7 @@ _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) (__v4df) _mm256_setzero_pd ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8402,7 +8152,7 @@ _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) (__v4sf) __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8410,7 +8160,7 @@ _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) (__v4sf) _mm_setzero_ps ()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8418,7 +8168,7 @@ _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) (__v8sf) __W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8426,7 +8176,7 @@ _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) (__v8sf) _mm256_setzero_ps ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8434,7 +8184,7 @@ _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8443,7 +8193,7 @@ _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8451,7 +8201,7 @@ _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8460,7 +8210,7 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8468,7 +8218,7 @@ _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8476,17 +8226,17 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) (__mmask8) __U); } -#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ +#define _mm_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ +#define _mm_maskz_cvt_roundps_ph(U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8494,24 +8244,25 @@ _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, (__v8hi) _mm_setzero_si128(), (__mmask8) __U); } -#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ +#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ +#define _mm256_maskz_cvt_roundps_ph(U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLINTRIN_H */ diff --git a/lib/clang/8.0.0/include/avx512vlvbmi2intrin.h b/lib/clang/8.0.0/include/avx512vlvbmi2intrin.h new file mode 100644 index 0000000..baaf565 --- /dev/null +++ b/lib/clang/8.0.0/include/avx512vlvbmi2intrin.h @@ -0,0 +1,751 @@ +/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLVBMI2INTRIN_H +#define __AVX512VLVBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) _mm_setzero_si128(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) _mm_setzero_si128(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) +{ + __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) +{ + __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) _mm_setzero_si128(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) _mm_setzero_si128(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) _mm_setzero_si128(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) _mm_setzero_si128(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_si256(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_si256(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) +{ + __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) +{ + __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_si256(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_si256(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) _mm256_setzero_si256(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) _mm256_setzero_si256(), + __U); +} + +#define _mm256_shldi_epi64(A, B, I) \ + (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shldi_epi64(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) + +#define _mm256_maskz_shldi_epi64(U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) + +#define _mm_shldi_epi64(A, B, I) \ + (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) + +#define _mm_mask_shldi_epi64(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) + +#define _mm_maskz_shldi_epi64(U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) + +#define _mm256_shldi_epi32(A, B, I) \ + (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shldi_epi32(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) + +#define _mm256_maskz_shldi_epi32(U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) + +#define _mm_shldi_epi32(A, B, I) \ + (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) + +#define _mm_mask_shldi_epi32(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) + +#define _mm_maskz_shldi_epi32(U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) + +#define _mm256_shldi_epi16(A, B, I) \ + (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shldi_epi16(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) + +#define _mm256_maskz_shldi_epi16(U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) + +#define _mm_shldi_epi16(A, B, I) \ + (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) + +#define _mm_mask_shldi_epi16(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) + +#define _mm_maskz_shldi_epi16(U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) + +#define _mm256_shrdi_epi64(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) + +#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) + +#define _mm_shrdi_epi64(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) + +#define _mm_mask_shrdi_epi64(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) + +#define _mm_maskz_shrdi_epi64(U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) + +#define _mm256_shrdi_epi32(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) + +#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) + +#define _mm_shrdi_epi32(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) + +#define _mm_mask_shrdi_epi32(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) + +#define _mm_maskz_shrdi_epi32(U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) + +#define _mm256_shrdi_epi16(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) + +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) + +#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) + +#define _mm_shrdi_epi16(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) + +#define _mm_mask_shrdi_epi16(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) + +#define _mm_maskz_shrdi_epi16(U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/clang/8.0.0/include/avx512vlvnniintrin.h b/lib/clang/8.0.0/include/avx512vlvnniintrin.h new file mode 100644 index 0000000..6238226 --- /dev/null +++ b/lib/clang/8.0.0/include/avx512vlvnniintrin.h @@ -0,0 +1,223 @@ +/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLVNNIINTRIN_H +#define __AVX512VLVNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, + (__v8si)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), + (__v8si)__S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, + (__v8si)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), + (__v8si)__S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, + (__v8si)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), + (__v8si)__S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, + (__v8si)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), + (__v8si)__S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, + (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusd_epi32(__S, __A, __B), + (__v4si)__S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusd_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, + (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusds_epi32(__S, __A, __B), + (__v4si)__S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusds_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, + (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssd_epi32(__S, __A, __B), + (__v4si)__S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssd_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, + (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssds_epi32(__S, __A, __B), + (__v4si)__S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssds_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/clang/7.0.0/include/avx512vnniintrin.h b/lib/clang/8.0.0/include/avx512vnniintrin.h similarity index 58% rename from lib/clang/7.0.0/include/avx512vnniintrin.h rename to lib/clang/8.0.0/include/avx512vnniintrin.h index 0c6badd..620ef5a 100644 --- a/lib/clang/7.0.0/include/avx512vnniintrin.h +++ b/lib/clang/8.0.0/include/avx512vnniintrin.h @@ -29,118 +29,101 @@ #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512))) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, + (__v16si)__B); +} + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) -{ - return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } - #undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/clang/7.0.0/include/avx512vpopcntdqintrin.h b/lib/clang/8.0.0/include/avx512vpopcntdqintrin.h similarity index 93% rename from lib/clang/7.0.0/include/avx512vpopcntdqintrin.h rename to lib/clang/8.0.0/include/avx512vpopcntdqintrin.h index 34ab849..c99f594 100644 --- a/lib/clang/7.0.0/include/avx512vpopcntdqintrin.h +++ b/lib/clang/8.0.0/include/avx512vpopcntdqintrin.h @@ -1,5 +1,4 @@ -/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics - *------------------=== +/*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,8 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \ - "q"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); diff --git a/lib/clang/7.0.0/include/avx512vpopcntdqvlintrin.h b/lib/clang/8.0.0/include/avx512vpopcntdqvlintrin.h similarity index 72% rename from lib/clang/7.0.0/include/avx512vpopcntdqvlintrin.h rename to lib/clang/8.0.0/include/avx512vpopcntdqvlintrin.h index c2058a8..681a75f 100644 --- a/lib/clang/7.0.0/include/avx512vpopcntdqvlintrin.h +++ b/lib/clang/8.0.0/include/avx512vpopcntdqvlintrin.h @@ -1,5 +1,4 @@ -/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics - *------------------=== +/*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -31,69 +30,76 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128( (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256( (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif diff --git a/lib/clang/7.0.0/include/avxintrin.h b/lib/clang/8.0.0/include/avxintrin.h similarity index 87% rename from lib/clang/7.0.0/include/avxintrin.h rename to lib/clang/8.0.0/include/avxintrin.h index 27dc644..cb15396 100644 --- a/lib/clang/7.0.0/include/avxintrin.h +++ b/lib/clang/8.0.0/include/avxintrin.h @@ -50,10 +50,11 @@ typedef double __m256d __attribute__((__vector_size__(32))); typedef long long __m256i __attribute__((__vector_size__(32))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128))) /* Arithmetic */ -/// \brief Adds two 256-bit vectors of [4 x double]. +/// Adds two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -71,7 +72,7 @@ _mm256_add_pd(__m256d __a, __m256d __b) return (__m256d)((__v4df)__a+(__v4df)__b); } -/// \brief Adds two 256-bit vectors of [8 x float]. +/// Adds two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -89,7 +90,7 @@ _mm256_add_ps(__m256 __a, __m256 __b) return (__m256)((__v8sf)__a+(__v8sf)__b); } -/// \brief Subtracts two 256-bit vectors of [4 x double]. +/// Subtracts two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -107,7 +108,7 @@ _mm256_sub_pd(__m256d __a, __m256d __b) return (__m256d)((__v4df)__a-(__v4df)__b); } -/// \brief Subtracts two 256-bit vectors of [8 x float]. +/// Subtracts two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -125,7 +126,7 @@ _mm256_sub_ps(__m256 __a, __m256 __b) return (__m256)((__v8sf)__a-(__v8sf)__b); } -/// \brief Adds the even-indexed values and subtracts the odd-indexed values of +/// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [4 x double]. /// /// \headerfile @@ -144,7 +145,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } -/// \brief Adds the even-indexed values and subtracts the odd-indexed values of +/// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [8 x float]. /// /// \headerfile @@ -163,7 +164,7 @@ _mm256_addsub_ps(__m256 __a, __m256 __b) return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Divides two 256-bit vectors of [4 x double]. +/// Divides two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -181,7 +182,7 @@ _mm256_div_pd(__m256d __a, __m256d __b) return (__m256d)((__v4df)__a/(__v4df)__b); } -/// \brief Divides two 256-bit vectors of [8 x float]. +/// Divides two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -199,7 +200,7 @@ _mm256_div_ps(__m256 __a, __m256 __b) return (__m256)((__v8sf)__a/(__v8sf)__b); } -/// \brief Compares two 256-bit vectors of [4 x double] and returns the greater +/// Compares two 256-bit vectors of [4 x double] and returns the greater /// of each pair of values. /// /// \headerfile @@ -218,7 +219,7 @@ _mm256_max_pd(__m256d __a, __m256d __b) return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } -/// \brief Compares two 256-bit vectors of [8 x float] and returns the greater +/// Compares two 256-bit vectors of [8 x float] and returns the greater /// of each pair of values. /// /// \headerfile @@ -237,7 +238,7 @@ _mm256_max_ps(__m256 __a, __m256 __b) return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Compares two 256-bit vectors of [4 x double] and returns the lesser +/// Compares two 256-bit vectors of [4 x double] and returns the lesser /// of each pair of values. /// /// \headerfile @@ -256,7 +257,7 @@ _mm256_min_pd(__m256d __a, __m256d __b) return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } -/// \brief Compares two 256-bit vectors of [8 x float] and returns the lesser +/// Compares two 256-bit vectors of [8 x float] and returns the lesser /// of each pair of values. /// /// \headerfile @@ -275,7 +276,7 @@ _mm256_min_ps(__m256 __a, __m256 __b) return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Multiplies two 256-bit vectors of [4 x double]. +/// Multiplies two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -293,7 +294,7 @@ _mm256_mul_pd(__m256d __a, __m256d __b) return (__m256d)((__v4df)__a * (__v4df)__b); } -/// \brief Multiplies two 256-bit vectors of [8 x float]. +/// Multiplies two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -311,7 +312,7 @@ _mm256_mul_ps(__m256 __a, __m256 __b) return (__m256)((__v8sf)__a * (__v8sf)__b); } -/// \brief Calculates the square roots of the values in a 256-bit vector of +/// Calculates the square roots of the values in a 256-bit vector of /// [4 x double]. /// /// \headerfile @@ -328,7 +329,7 @@ _mm256_sqrt_pd(__m256d __a) return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } -/// \brief Calculates the square roots of the values in a 256-bit vector of +/// Calculates the square roots of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile @@ -345,7 +346,7 @@ _mm256_sqrt_ps(__m256 __a) return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } -/// \brief Calculates the reciprocal square roots of the values in a 256-bit +/// Calculates the reciprocal square roots of the values in a 256-bit /// vector of [8 x float]. /// /// \headerfile @@ -362,7 +363,7 @@ _mm256_rsqrt_ps(__m256 __a) return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); } -/// \brief Calculates the reciprocals of the values in a 256-bit vector of +/// Calculates the reciprocals of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile @@ -379,7 +380,7 @@ _mm256_rcp_ps(__m256 __a) return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); } -/// \brief Rounds the values in a 256-bit vector of [4 x double] as specified +/// Rounds the values in a 256-bit vector of [4 x double] as specified /// by the byte operand. The source values are rounded to integer values and /// returned as 64-bit double-precision floating-point values. /// @@ -408,10 +409,10 @@ _mm256_rcp_ps(__m256 __a) /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [4 x double] containing the rounded values. -#define _mm256_round_pd(V, M) __extension__ ({ \ - (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); }) +#define _mm256_round_pd(V, M) \ + (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)) -/// \brief Rounds the values stored in a 256-bit vector of [8 x float] as +/// Rounds the values stored in a 256-bit vector of [8 x float] as /// specified by the byte operand. The source values are rounded to integer /// values and returned as floating-point values. /// @@ -440,10 +441,10 @@ _mm256_rcp_ps(__m256 __a) /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [8 x float] containing the rounded values. -#define _mm256_round_ps(V, M) __extension__ ({ \ - (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); }) +#define _mm256_round_ps(V, M) \ + (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)) -/// \brief Rounds up the values stored in a 256-bit vector of [4 x double]. The +/// Rounds up the values stored in a 256-bit vector of [4 x double]. The /// source values are rounded up to integer values and returned as 64-bit /// double-precision floating-point values. /// @@ -460,7 +461,7 @@ _mm256_rcp_ps(__m256 __a) /// \returns A 256-bit vector of [4 x double] containing the rounded up values. #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) -/// \brief Rounds down the values stored in a 256-bit vector of [4 x double]. +/// Rounds down the values stored in a 256-bit vector of [4 x double]. /// The source values are rounded down to integer values and returned as /// 64-bit double-precision floating-point values. /// @@ -478,7 +479,7 @@ _mm256_rcp_ps(__m256 __a) /// values. #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) -/// \brief Rounds up the values stored in a 256-bit vector of [8 x float]. The +/// Rounds up the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded up to integer values and returned as /// floating-point values. /// @@ -495,7 +496,7 @@ _mm256_rcp_ps(__m256 __a) /// \returns A 256-bit vector of [8 x float] containing the rounded up values. #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) -/// \brief Rounds down the values stored in a 256-bit vector of [8 x float]. The +/// Rounds down the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded down to integer values and returned as /// floating-point values. /// @@ -513,7 +514,7 @@ _mm256_rcp_ps(__m256 __a) #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) /* Logical */ -/// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double]. +/// Performs a bitwise AND of two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -531,7 +532,7 @@ _mm256_and_pd(__m256d __a, __m256d __b) return (__m256d)((__v4du)__a & (__v4du)__b); } -/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float]. +/// Performs a bitwise AND of two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -549,7 +550,7 @@ _mm256_and_ps(__m256 __a, __m256 __b) return (__m256)((__v8su)__a & (__v8su)__b); } -/// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double], using +/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile @@ -570,7 +571,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b) return (__m256d)(~(__v4du)__a & (__v4du)__b); } -/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float], using +/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile @@ -591,7 +592,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b) return (__m256)(~(__v8su)__a & (__v8su)__b); } -/// \brief Performs a bitwise OR of two 256-bit vectors of [4 x double]. +/// Performs a bitwise OR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -609,7 +610,7 @@ _mm256_or_pd(__m256d __a, __m256d __b) return (__m256d)((__v4du)__a | (__v4du)__b); } -/// \brief Performs a bitwise OR of two 256-bit vectors of [8 x float]. +/// Performs a bitwise OR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -627,7 +628,7 @@ _mm256_or_ps(__m256 __a, __m256 __b) return (__m256)((__v8su)__a | (__v8su)__b); } -/// \brief Performs a bitwise XOR of two 256-bit vectors of [4 x double]. +/// Performs a bitwise XOR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// @@ -645,7 +646,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b) return (__m256d)((__v4du)__a ^ (__v4du)__b); } -/// \brief Performs a bitwise XOR of two 256-bit vectors of [8 x float]. +/// Performs a bitwise XOR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// @@ -664,7 +665,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b) } /* Horizontal arithmetic */ -/// \brief Horizontally adds the adjacent pairs of values contained in two +/// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile @@ -687,7 +688,7 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in two +/// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile @@ -710,7 +711,7 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in two +/// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile @@ -733,7 +734,7 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in two +/// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile @@ -757,7 +758,7 @@ _mm256_hsub_ps(__m256 __a, __m256 __b) } /* Vector permutations */ -/// \brief Copies the values in a 128-bit vector of [2 x double] as specified +/// Copies the values in a 128-bit vector of [2 x double] as specified /// by the 128-bit integer vector operand. /// /// \headerfile @@ -780,13 +781,13 @@ _mm256_hsub_ps(__m256 __a, __m256 __b) /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } -/// \brief Copies the values in a 256-bit vector of [4 x double] as specified +/// Copies the values in a 256-bit vector of [4 x double] as specified /// by the 256-bit integer vector operand. /// /// \headerfile @@ -825,7 +826,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } -/// \brief Copies the values stored in a 128-bit vector of [4 x float] as +/// Copies the values stored in a 128-bit vector of [4 x float] as /// specified by the 128-bit integer vector operand. /// \headerfile /// @@ -873,13 +874,13 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } -/// \brief Copies the values stored in a 256-bit vector of [8 x float] as +/// Copies the values stored in a 256-bit vector of [8 x float] as /// specified by the 256-bit integer vector operand. /// /// \headerfile @@ -970,7 +971,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } -/// \brief Copies the values in a 128-bit vector of [2 x double] as specified +/// Copies the values in a 128-bit vector of [2 x double] as specified /// by the immediate integer operand. /// /// \headerfile @@ -997,12 +998,10 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -#define _mm_permute_pd(A, C) __extension__ ({ \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \ - (__v2df)_mm_undefined_pd(), \ - ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); }) +#define _mm_permute_pd(A, C) \ + (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)) -/// \brief Copies the values in a 256-bit vector of [4 x double] as specified by +/// Copies the values in a 256-bit vector of [4 x double] as specified by /// the immediate integer operand. /// /// \headerfile @@ -1039,15 +1038,10 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_permute_pd(A, C) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_undefined_pd(), \ - 0 + (((C) >> 0) & 0x1), \ - 0 + (((C) >> 1) & 0x1), \ - 2 + (((C) >> 2) & 0x1), \ - 2 + (((C) >> 3) & 0x1)); }) - -/// \brief Copies the values in a 128-bit vector of [4 x float] as specified by +#define _mm256_permute_pd(A, C) \ + (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)) + +/// Copies the values in a 128-bit vector of [4 x float] as specified by /// the immediate integer operand. /// /// \headerfile @@ -1100,13 +1094,10 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -#define _mm_permute_ps(A, C) __extension__ ({ \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \ - (__v4sf)_mm_undefined_ps(), \ - ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) +#define _mm_permute_ps(A, C) \ + (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)) -/// \brief Copies the values in a 256-bit vector of [8 x float] as specified by +/// Copies the values in a 256-bit vector of [8 x float] as specified by /// the immediate integer operand. /// /// \headerfile @@ -1195,19 +1186,10 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_permute_ps(A, C) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_undefined_ps(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) - -/// \brief Permutes 128-bit data values stored in two 256-bit vectors of +#define _mm256_permute_ps(A, C) \ + (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)) + +/// Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. /// /// \headerfile @@ -1244,11 +1226,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_pd(V1, V2, M) \ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \ - (__v4df)(__m256d)(V2), (M)); }) + (__v4df)(__m256d)(V2), (int)(M)) -/// \brief Permutes 128-bit data values stored in two 256-bit vectors of +/// Permutes 128-bit data values stored in two 256-bit vectors of /// [8 x float], as specified by the immediate integer operand. /// /// \headerfile @@ -1285,11 +1267,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_ps(V1, V2, M) \ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), (M)); }) + (__v8sf)(__m256)(V2), (int)(M)) -/// \brief Permutes 128-bit data values stored in two 256-bit integer vectors, +/// Permutes 128-bit data values stored in two 256-bit integer vectors, /// as specified by the immediate integer operand. /// /// \headerfile @@ -1325,12 +1307,12 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit integer vector containing the copied values. -#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_si256(V1, V2, M) \ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \ - (__v8si)(__m256i)(V2), (M)); }) + (__v8si)(__m256i)(V2), (int)(M)) /* Vector Blend */ -/// \brief Merges 64-bit double-precision data values stored in either of the +/// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the immediate /// integer operand. /// @@ -1354,15 +1336,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// destination. When a mask bit is 1, the corresponding 64-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \ - (__v4df)(__m256d)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) - -/// \brief Merges 32-bit single-precision data values stored in either of the +#define _mm256_blend_pd(V1, V2, M) \ + (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \ + (__v4df)(__m256d)(V2), (int)(M)) + +/// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the immediate /// integer operand. /// @@ -1386,19 +1364,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// destination. When a mask bit is 1, the corresponding 32-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) - -/// \brief Merges 64-bit double-precision data values stored in either of the +#define _mm256_blend_ps(V1, V2, M) \ + (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \ + (__v8sf)(__m256)(V2), (int)(M)) + +/// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the 256-bit vector /// operand. /// @@ -1426,7 +1396,7 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) (__v4df)__a, (__v4df)__b, (__v4df)__c); } -/// \brief Merges 32-bit single-precision data values stored in either of the +/// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the 256-bit vector /// operand. /// @@ -1455,7 +1425,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) } /* Vector Dot Product */ -/// \brief Computes two dot products in parallel, using the lower and upper +/// Computes two dot products in parallel, using the lower and upper /// halves of two [8 x float] vectors as input to the two computations, and /// returning the two dot products in the lower and upper halves of the /// [8 x float] result. @@ -1492,12 +1462,12 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// is set to zero. The bitmask is applied in the same way to each of the /// two parallel dot product computations. /// \returns A 256-bit vector of [8 x float] containing the two dot products. -#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_dp_ps(V1, V2, M) \ (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), (M)); }) + (__v8sf)(__m256)(V2), (M)) /* Vector shuffle */ -/// \brief Selects 8 float values from the 256-bit operands of [8 x float], as +/// Selects 8 float values from the 256-bit operands of [8 x float], as /// specified by the immediate value operand. /// /// The four selected elements in each operand are copied to the destination @@ -1546,19 +1516,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n /// 11: Bits [127:96] and [255:224] are copied from the selected operand. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. -#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), \ - 0 + (((mask) >> 0) & 0x3), \ - 0 + (((mask) >> 2) & 0x3), \ - 8 + (((mask) >> 4) & 0x3), \ - 8 + (((mask) >> 6) & 0x3), \ - 4 + (((mask) >> 0) & 0x3), \ - 4 + (((mask) >> 2) & 0x3), \ - 12 + (((mask) >> 4) & 0x3), \ - 12 + (((mask) >> 6) & 0x3)); }) - -/// \brief Selects four double-precision values from the 256-bit operands of +#define _mm256_shuffle_ps(a, b, mask) \ + (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), (int)(mask)) + +/// Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. /// /// The selected elements from the first 256-bit operand are copied to bits @@ -1600,13 +1562,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. -#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), \ - 0 + (((mask) >> 0) & 0x1), \ - 4 + (((mask) >> 1) & 0x1), \ - 2 + (((mask) >> 2) & 0x1), \ - 6 + (((mask) >> 3) & 0x1)); }) +#define _mm256_shuffle_pd(a, b, mask) \ + (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ + (__v4df)(__m256d)(b), (int)(mask)) /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ @@ -1642,7 +1600,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ -/// \brief Compares each of the corresponding double-precision values of two +/// Compares each of the corresponding double-precision values of two /// 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// @@ -1698,11 +1656,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_pd(a, b, c) __extension__ ({ \ +#define _mm_cmp_pd(a, b, c) \ (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c)); }) + (__v2df)(__m128d)(b), (c)) -/// \brief Compares each of the corresponding values of two 128-bit vectors of +/// Compares each of the corresponding values of two 128-bit vectors of /// [4 x float], using the operation specified by the immediate integer /// operand. /// @@ -1758,11 +1716,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ps(a, b, c) __extension__ ({ \ +#define _mm_cmp_ps(a, b, c) \ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c)); }) + (__v4sf)(__m128)(b), (c)) -/// \brief Compares each of the corresponding double-precision values of two +/// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the /// immediate integer operand. /// @@ -1818,11 +1776,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [4 x double] containing the comparison results. -#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ +#define _mm256_cmp_pd(a, b, c) \ (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), (c)); }) + (__v4df)(__m256d)(b), (c)) -/// \brief Compares each of the corresponding values of two 256-bit vectors of +/// Compares each of the corresponding values of two 256-bit vectors of /// [8 x float], using the operation specified by the immediate integer /// operand. /// @@ -1878,11 +1836,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [8 x float] containing the comparison results. -#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ +#define _mm256_cmp_ps(a, b, c) \ (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), (c)); }) + (__v8sf)(__m256)(b), (c)) -/// \brief Compares each of the corresponding scalar double-precision values of +/// Compares each of the corresponding scalar double-precision values of /// two 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// @@ -1937,11 +1895,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_sd(a, b, c) __extension__ ({ \ +#define _mm_cmp_sd(a, b, c) \ (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c)); }) + (__v2df)(__m128d)(b), (c)) -/// \brief Compares each of the corresponding scalar values of two 128-bit +/// Compares each of the corresponding scalar values of two 128-bit /// vectors of [4 x float], using the operation specified by the immediate /// integer operand. /// @@ -1996,11 +1954,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ss(a, b, c) __extension__ ({ \ +#define _mm_cmp_ss(a, b, c) \ (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c)); }) + (__v4sf)(__m128)(b), (c)) -/// \brief Takes a [8 x i32] vector and returns the vector element value +/// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile @@ -2015,14 +1973,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi32(__m256i __a, const int __imm) -{ - __v8si __b = (__v8si)__a; - return __b[__imm & 7]; -} +#define _mm256_extract_epi32(X, N) \ + (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N)) -/// \brief Takes a [16 x i16] vector and returns the vector element value +/// Takes a [16 x i16] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile @@ -2037,14 +1991,11 @@ _mm256_extract_epi32(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi16(__m256i __a, const int __imm) -{ - __v16hi __b = (__v16hi)__a; - return (unsigned short)__b[__imm & 15]; -} +#define _mm256_extract_epi16(X, N) \ + (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \ + (int)(N)) -/// \brief Takes a [32 x i8] vector and returns the vector element value +/// Takes a [32 x i8] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile @@ -2059,15 +2010,12 @@ _mm256_extract_epi16(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi8(__m256i __a, const int __imm) -{ - __v32qi __b = (__v32qi)__a; - return (unsigned char)__b[__imm & 31]; -} +#define _mm256_extract_epi8(X, N) \ + (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \ + (int)(N)) #ifdef __x86_64__ -/// \brief Takes a [4 x i64] vector and returns the vector element value +/// Takes a [4 x i64] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile @@ -2082,15 +2030,11 @@ _mm256_extract_epi8(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended /// packed data. -static __inline long long __DEFAULT_FN_ATTRS -_mm256_extract_epi64(__m256i __a, const int __imm) -{ - __v4di __b = (__v4di)__a; - return __b[__imm & 3]; -} +#define _mm256_extract_epi64(X, N) \ + (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N)) #endif -/// \brief Takes a [8 x i32] vector and replaces the vector element value +/// Takes a [8 x i32] vector and replaces the vector element value /// indexed by the immediate constant operand by a new value. Returns the /// modified vector. /// @@ -2108,16 +2052,12 @@ _mm256_extract_epi64(__m256i __a, const int __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi32(__m256i __a, int __b, int const __imm) -{ - __v8si __c = (__v8si)__a; - __c[__imm & 7] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi32(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ + (int)(I), (int)(N)) -/// \brief Takes a [16 x i16] vector and replaces the vector element value +/// Takes a [16 x i16] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// @@ -2135,15 +2075,11 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi16(__m256i __a, int __b, int const __imm) -{ - __v16hi __c = (__v16hi)__a; - __c[__imm & 15] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi16(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ + (int)(I), (int)(N)) -/// \brief Takes a [32 x i8] vector and replaces the vector element value +/// Takes a [32 x i8] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// @@ -2161,16 +2097,12 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi8(__m256i __a, int __b, int const __imm) -{ - __v32qi __c = (__v32qi)__a; - __c[__imm & 31] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi8(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ + (int)(I), (int)(N)) #ifdef __x86_64__ -/// \brief Takes a [4 x i64] vector and replaces the vector element value +/// Takes a [4 x i64] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// @@ -2188,17 +2120,13 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi64(__m256i __a, long long __b, int const __imm) -{ - __v4di __c = (__v4di)__a; - __c[__imm & 3] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi64(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ + (long long)(I), (int)(N)) #endif /* Conversion */ -/// \brief Converts a vector of [4 x i32] into a vector of [4 x double]. +/// Converts a vector of [4 x i32] into a vector of [4 x double]. /// /// \headerfile /// @@ -2213,7 +2141,7 @@ _mm256_cvtepi32_pd(__m128i __a) return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } -/// \brief Converts a vector of [8 x i32] into a vector of [8 x float]. +/// Converts a vector of [8 x i32] into a vector of [8 x float]. /// /// \headerfile /// @@ -2225,10 +2153,10 @@ _mm256_cvtepi32_pd(__m128i __a) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a) { - return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); + return (__m256)__builtin_convertvector((__v8si)__a, __v8sf); } -/// \brief Converts a 256-bit vector of [4 x double] into a 128-bit vector of +/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of /// [4 x float]. /// /// \headerfile @@ -2244,7 +2172,7 @@ _mm256_cvtpd_ps(__m256d __a) return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); } -/// \brief Converts a vector of [8 x float] into a vector of [8 x i32]. +/// Converts a vector of [8 x float] into a vector of [8 x i32]. /// /// \headerfile /// @@ -2259,7 +2187,7 @@ _mm256_cvtps_epi32(__m256 __a) return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); } -/// \brief Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 +/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 /// x double]. /// /// \headerfile @@ -2275,7 +2203,7 @@ _mm256_cvtps_pd(__m128 __a) return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } -/// \brief Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 +/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32], truncating the result by rounding towards zero when it is /// inexact. /// @@ -2292,7 +2220,7 @@ _mm256_cvttpd_epi32(__m256d __a) return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } -/// \brief Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 +/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32]. When a conversion is inexact, the value returned is rounded /// according to the rounding control bits in the MXCSR register. /// @@ -2309,7 +2237,7 @@ _mm256_cvtpd_epi32(__m256d __a) return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); } -/// \brief Converts a vector of [8 x float] into a vector of [8 x i32], +/// Converts a vector of [8 x float] into a vector of [8 x i32], /// truncating the result by rounding towards zero when it is inexact. /// /// \headerfile @@ -2325,7 +2253,7 @@ _mm256_cvttps_epi32(__m256 __a) return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } -/// \brief Returns the first element of the input vector of [4 x double]. +/// Returns the first element of the input vector of [4 x double]. /// /// \headerfile /// @@ -2341,7 +2269,7 @@ _mm256_cvtsd_f64(__m256d __a) return __a[0]; } -/// \brief Returns the first element of the input vector of [8 x i32]. +/// Returns the first element of the input vector of [8 x i32]. /// /// \headerfile /// @@ -2358,7 +2286,7 @@ _mm256_cvtsi256_si32(__m256i __a) return __b[0]; } -/// \brief Returns the first element of the input vector of [8 x float]. +/// Returns the first element of the input vector of [8 x float]. /// /// \headerfile /// @@ -2375,7 +2303,7 @@ _mm256_cvtss_f32(__m256 __a) } /* Vector replicate */ -/// \brief Moves and duplicates odd-indexed values from a 256-bit vector of +/// Moves and duplicates odd-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile @@ -2400,7 +2328,7 @@ _mm256_movehdup_ps(__m256 __a) return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); } -/// \brief Moves and duplicates even-indexed values from a 256-bit vector of +/// Moves and duplicates even-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile @@ -2425,7 +2353,7 @@ _mm256_moveldup_ps(__m256 __a) return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); } -/// \brief Moves and duplicates double-precision floating point values from a +/// Moves and duplicates double-precision floating point values from a /// 256-bit vector of [4 x double] to double-precision values in a 256-bit /// vector of [4 x double]. /// @@ -2448,7 +2376,7 @@ _mm256_movedup_pd(__m256d __a) } /* Unpack and Interleave */ -/// \brief Unpacks the odd-indexed vector elements from two 256-bit vectors of +/// Unpacks the odd-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile @@ -2470,7 +2398,7 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b) return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } -/// \brief Unpacks the even-indexed vector elements from two 256-bit vectors of +/// Unpacks the even-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile @@ -2492,7 +2420,7 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b) return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } -/// \brief Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the +/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// @@ -2519,7 +2447,7 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b) return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } -/// \brief Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the +/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// @@ -2547,7 +2475,7 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) } /* Bit Test */ -/// \brief Given two 128-bit floating-point vectors of [2 x double], perform an +/// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2570,13 +2498,13 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } -/// \brief Given two 128-bit floating-point vectors of [2 x double], perform an +/// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2599,13 +2527,13 @@ _mm_testz_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } -/// \brief Given two 128-bit floating-point vectors of [2 x double], perform an +/// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2629,13 +2557,13 @@ _mm_testc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } -/// \brief Given two 128-bit floating-point vectors of [4 x float], perform an +/// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2658,13 +2586,13 @@ _mm_testnzc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } -/// \brief Given two 128-bit floating-point vectors of [4 x float], perform an +/// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2687,13 +2615,13 @@ _mm_testz_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } -/// \brief Given two 128-bit floating-point vectors of [4 x float], perform an +/// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2717,13 +2645,13 @@ _mm_testc_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } -/// \brief Given two 256-bit floating-point vectors of [4 x double], perform an +/// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. @@ -2752,7 +2680,7 @@ _mm256_testz_pd(__m256d __a, __m256d __b) return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } -/// \brief Given two 256-bit floating-point vectors of [4 x double], perform an +/// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. @@ -2781,7 +2709,7 @@ _mm256_testc_pd(__m256d __a, __m256d __b) return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } -/// \brief Given two 256-bit floating-point vectors of [4 x double], perform an +/// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. @@ -2811,7 +2739,7 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b) return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } -/// \brief Given two 256-bit floating-point vectors of [8 x float], perform an +/// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2840,7 +2768,7 @@ _mm256_testz_ps(__m256 __a, __m256 __b) return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Given two 256-bit floating-point vectors of [8 x float], perform an +/// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. @@ -2869,7 +2797,7 @@ _mm256_testc_ps(__m256 __a, __m256 __b) return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Given two 256-bit floating-point vectors of [8 x float], perform an +/// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. @@ -2899,7 +2827,7 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b) return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } -/// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison +/// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n @@ -2925,7 +2853,7 @@ _mm256_testz_si256(__m256i __a, __m256i __b) return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } -/// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison +/// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n @@ -2951,7 +2879,7 @@ _mm256_testc_si256(__m256i __a, __m256i __b) return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } -/// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison +/// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n @@ -2979,7 +2907,7 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) } /* Vector extract sign mask */ -/// \brief Extracts the sign bits of double-precision floating point elements +/// Extracts the sign bits of double-precision floating point elements /// in a 256-bit vector of [4 x double] and writes them to the lower order /// bits of the return value. /// @@ -2997,7 +2925,7 @@ _mm256_movemask_pd(__m256d __a) return __builtin_ia32_movmskpd256((__v4df)__a); } -/// \brief Extracts the sign bits of single-precision floating point elements +/// Extracts the sign bits of single-precision floating point elements /// in a 256-bit vector of [8 x float] and writes them to the lower order /// bits of the return value. /// @@ -3016,30 +2944,30 @@ _mm256_movemask_ps(__m256 __a) } /* Vector __zero */ -/// \brief Zeroes the contents of all XMM or YMM registers. +/// Zeroes the contents of all XMM or YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROALL instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) { __builtin_ia32_vzeroall(); } -/// \brief Zeroes the upper 128 bits (bits 255:128) of all YMM registers. +/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROUPPER instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); } /* Vector load with broadcast */ -/// \brief Loads a scalar single-precision floating point value from the +/// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x float] vector. /// @@ -3051,14 +2979,14 @@ _mm256_zeroupper(void) /// The single-precision floating point value to be broadcast. /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set /// equal to the broadcast value. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_broadcast_ss(float const *__a) { float __f = *__a; - return (__m128)(__v4sf){ __f, __f, __f, __f }; + return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f }; } -/// \brief Loads a scalar double-precision floating point value from the +/// Loads a scalar double-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x double] vector. /// @@ -3074,10 +3002,10 @@ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd(double const *__a) { double __d = *__a; - return (__m256d)(__v4df){ __d, __d, __d, __d }; + return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d }; } -/// \brief Loads a scalar single-precision floating point value from the +/// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [8 x float] vector. /// @@ -3093,10 +3021,10 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss(float const *__a) { float __f = *__a; - return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; + return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; } -/// \brief Loads the data from a 128-bit vector of [2 x double] from the +/// Loads the data from a 128-bit vector of [2 x double] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [4 x double]. /// @@ -3111,10 +3039,12 @@ _mm256_broadcast_ss(float const *__a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { - return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a); + __m128d __b = _mm_loadu_pd((const double *)__a); + return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b, + 0, 1, 0, 1); } -/// \brief Loads the data from a 128-bit vector of [4 x float] from the +/// Loads the data from a 128-bit vector of [4 x float] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [8 x float]. /// @@ -3129,11 +3059,13 @@ _mm256_broadcast_pd(__m128d const *__a) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { - return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a); + __m128 __b = _mm_loadu_ps((const float *)__a); + return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b, + 0, 1, 2, 3, 0, 1, 2, 3); } /* SIMD load ops */ -/// \brief Loads 4 double-precision floating point values from a 32-byte aligned +/// Loads 4 double-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile @@ -3150,7 +3082,7 @@ _mm256_load_pd(double const *__p) return *(__m256d *)__p; } -/// \brief Loads 8 single-precision floating point values from a 32-byte aligned +/// Loads 8 single-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile @@ -3166,7 +3098,7 @@ _mm256_load_ps(float const *__p) return *(__m256 *)__p; } -/// \brief Loads 4 double-precision floating point values from an unaligned +/// Loads 4 double-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile @@ -3186,7 +3118,7 @@ _mm256_loadu_pd(double const *__p) return ((struct __loadu_pd*)__p)->__v; } -/// \brief Loads 8 single-precision floating point values from an unaligned +/// Loads 8 single-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile @@ -3206,7 +3138,7 @@ _mm256_loadu_ps(float const *__p) return ((struct __loadu_ps*)__p)->__v; } -/// \brief Loads 256 bits of integer data from a 32-byte aligned memory +/// Loads 256 bits of integer data from a 32-byte aligned memory /// location pointed to by \a __p into elements of a 256-bit integer vector. /// /// \headerfile @@ -3223,7 +3155,7 @@ _mm256_load_si256(__m256i const *__p) return *__p; } -/// \brief Loads 256 bits of integer data from an unaligned memory location +/// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. /// /// \headerfile @@ -3242,7 +3174,7 @@ _mm256_loadu_si256(__m256i const *__p) return ((struct __loadu_si256*)__p)->__v; } -/// \brief Loads 256 bits of integer data from an unaligned memory location +/// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. This intrinsic may /// perform better than \c _mm256_loadu_si256 when the data crosses a cache /// line boundary. @@ -3261,7 +3193,7 @@ _mm256_lddqu_si256(__m256i const *__p) } /* SIMD store ops */ -/// \brief Stores double-precision floating point values from a 256-bit vector +/// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to a 32-byte aligned memory location pointed to by /// \a __p. /// @@ -3280,7 +3212,7 @@ _mm256_store_pd(double *__p, __m256d __a) *(__m256d *)__p = __a; } -/// \brief Stores single-precision floating point values from a 256-bit vector +/// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location pointed to by \a __p. /// /// \headerfile @@ -3298,7 +3230,7 @@ _mm256_store_ps(float *__p, __m256 __a) *(__m256 *)__p = __a; } -/// \brief Stores double-precision floating point values from a 256-bit vector +/// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to an unaligned memory location pointed to by \a __p. /// /// \headerfile @@ -3319,7 +3251,7 @@ _mm256_storeu_pd(double *__p, __m256d __a) ((struct __storeu_pd*)__p)->__v = __a; } -/// \brief Stores single-precision floating point values from a 256-bit vector +/// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to an unaligned memory location pointed to by \a __p. /// /// \headerfile @@ -3339,7 +3271,7 @@ _mm256_storeu_ps(float *__p, __m256 __a) ((struct __storeu_ps*)__p)->__v = __a; } -/// \brief Stores integer values from a 256-bit integer vector to a 32-byte +/// Stores integer values from a 256-bit integer vector to a 32-byte /// aligned memory location pointed to by \a __p. /// /// \headerfile @@ -3357,7 +3289,7 @@ _mm256_store_si256(__m256i *__p, __m256i __a) *__p = __a; } -/// \brief Stores integer values from a 256-bit integer vector to an unaligned +/// Stores integer values from a 256-bit integer vector to an unaligned /// memory location pointed to by \a __p. /// /// \headerfile @@ -3378,7 +3310,7 @@ _mm256_storeu_si256(__m256i *__p, __m256i __a) } /* Conditional load ops */ -/// \brief Conditionally loads double-precision floating point elements from a +/// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [2 x double], depending on the mask bits associated with each data /// element. @@ -3396,13 +3328,13 @@ _mm256_storeu_si256(__m256i *__p, __m256i __a) /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [2 x double] containing the loaded values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_maskload_pd(double const *__p, __m128i __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); } -/// \brief Conditionally loads double-precision floating point elements from a +/// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [4 x double], depending on the mask bits associated with each data /// element. @@ -3427,7 +3359,7 @@ _mm256_maskload_pd(double const *__p, __m256i __m) (__v4di)__m); } -/// \brief Conditionally loads single-precision floating point elements from a +/// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [4 x float], depending on the mask bits associated with each data /// element. @@ -3445,13 +3377,13 @@ _mm256_maskload_pd(double const *__p, __m256i __m) /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [4 x float] containing the loaded values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_maskload_ps(float const *__p, __m128i __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); } -/// \brief Conditionally loads single-precision floating point elements from a +/// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [8 x float], depending on the mask bits associated with each data /// element. @@ -3476,7 +3408,7 @@ _mm256_maskload_ps(float const *__p, __m256i __m) } /* Conditional store ops */ -/// \brief Moves single-precision floating point values from a 256-bit vector +/// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// @@ -3500,7 +3432,7 @@ _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a) __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a); } -/// \brief Moves double-precision values from a 128-bit vector of [2 x double] +/// Moves double-precision values from a 128-bit vector of [2 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// @@ -3518,13 +3450,13 @@ _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a) /// changed. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); } -/// \brief Moves double-precision values from a 256-bit vector of [4 x double] +/// Moves double-precision values from a 256-bit vector of [4 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// @@ -3548,7 +3480,7 @@ _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a) __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a); } -/// \brief Moves single-precision floating point values from a 128-bit vector +/// Moves single-precision floating point values from a 128-bit vector /// of [4 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// @@ -3566,14 +3498,14 @@ _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a) /// changed. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); } /* Cacheability support ops */ -/// \brief Moves integer data from a 256-bit integer vector to a 32-byte +/// Moves integer data from a 256-bit integer vector to a 32-byte /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// @@ -3593,7 +3525,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); } -/// \brief Moves double-precision values from a 256-bit vector of [4 x double] +/// Moves double-precision values from a 256-bit vector of [4 x double] /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). /// @@ -3613,7 +3545,7 @@ _mm256_stream_pd(double *__a, __m256d __b) __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); } -/// \brief Moves single-precision floating point values from a 256-bit vector +/// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location. To minimize /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). @@ -3635,7 +3567,7 @@ _mm256_stream_ps(float *__p, __m256 __a) } /* Create vectors */ -/// \brief Create a 256-bit vector of [4 x double] with undefined values. +/// Create a 256-bit vector of [4 x double] with undefined values. /// /// \headerfile /// @@ -3648,7 +3580,7 @@ _mm256_undefined_pd(void) return (__m256d)__builtin_ia32_undef256(); } -/// \brief Create a 256-bit vector of [8 x float] with undefined values. +/// Create a 256-bit vector of [8 x float] with undefined values. /// /// \headerfile /// @@ -3661,7 +3593,7 @@ _mm256_undefined_ps(void) return (__m256)__builtin_ia32_undef256(); } -/// \brief Create a 256-bit integer vector with undefined values. +/// Create a 256-bit integer vector with undefined values. /// /// \headerfile /// @@ -3674,7 +3606,7 @@ _mm256_undefined_si256(void) return (__m256i)__builtin_ia32_undef256(); } -/// \brief Constructs a 256-bit floating-point vector of [4 x double] +/// Constructs a 256-bit floating-point vector of [4 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile @@ -3698,10 +3630,10 @@ _mm256_undefined_si256(void) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { - return (__m256d){ __d, __c, __b, __a }; + return __extension__ (__m256d){ __d, __c, __b, __a }; } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] initialized +/// Constructs a 256-bit floating-point vector of [8 x float] initialized /// with the specified single-precision floating-point values. /// /// \headerfile @@ -3738,10 +3670,10 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { - return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; + return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; } -/// \brief Constructs a 256-bit integer vector initialized with the specified +/// Constructs a 256-bit integer vector initialized with the specified /// 32-bit integral values. /// /// \headerfile @@ -3770,10 +3702,10 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { - return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; + return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; } -/// \brief Constructs a 256-bit integer vector initialized with the specified +/// Constructs a 256-bit integer vector initialized with the specified /// 16-bit integral values. /// /// \headerfile @@ -3820,11 +3752,11 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { - return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, + return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; } -/// \brief Constructs a 256-bit integer vector initialized with the specified +/// Constructs a 256-bit integer vector initialized with the specified /// 8-bit integral values. /// /// \headerfile @@ -3907,7 +3839,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { - return (__m256i)(__v32qi){ + return __extension__ (__m256i)(__v32qi){ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, @@ -3915,7 +3847,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, }; } -/// \brief Constructs a 256-bit integer vector initialized with the specified +/// Constructs a 256-bit integer vector initialized with the specified /// 64-bit integral values. /// /// \headerfile @@ -3935,11 +3867,11 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { - return (__m256i)(__v4di){ __d, __c, __b, __a }; + return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; } /* Create vectors with elements in reverse order */ -/// \brief Constructs a 256-bit floating-point vector of [4 x double], +/// Constructs a 256-bit floating-point vector of [4 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// @@ -3964,10 +3896,10 @@ _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd(double __a, double __b, double __c, double __d) { - return (__m256d){ __a, __b, __c, __d }; + return _mm256_set_pd(__d, __c, __b, __a); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float], +/// Constructs a 256-bit floating-point vector of [8 x float], /// initialized in reverse order with the specified single-precision /// float-point values. /// @@ -4005,10 +3937,10 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { - return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; + return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a); } -/// \brief Constructs a 256-bit integer vector, initialized in reverse order +/// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile @@ -4037,10 +3969,10 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { - return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; + return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0); } -/// \brief Constructs a 256-bit integer vector, initialized in reverse order +/// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile @@ -4087,11 +4019,13 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { - return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, - __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; + return _mm256_set_epi16(__w00, __w01, __w02, __w03, + __w04, __w05, __w06, __w07, + __w08, __w09, __w10, __w11, + __w12, __w13, __w14, __w15); } -/// \brief Constructs a 256-bit integer vector, initialized in reverse order +/// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile @@ -4174,14 +4108,13 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { - return (__m256i)(__v32qi){ - __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, - __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, - __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, - __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; + return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, + __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, + __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, + __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31); } -/// \brief Constructs a 256-bit integer vector, initialized in reverse order +/// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile @@ -4201,11 +4134,11 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { - return (__m256i)(__v4di){ __a, __b, __c, __d }; + return _mm256_set_epi64x(__d, __c, __b, __a); } /* Create vectors with repeated elements */ -/// \brief Constructs a 256-bit floating-point vector of [4 x double], with each +/// Constructs a 256-bit floating-point vector of [4 x double], with each /// of the four double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// @@ -4220,10 +4153,10 @@ _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd(double __w) { - return (__m256d){ __w, __w, __w, __w }; + return _mm256_set_pd(__w, __w, __w, __w); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float], with each +/// Constructs a 256-bit floating-point vector of [8 x float], with each /// of the eight single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// @@ -4239,10 +4172,10 @@ _mm256_set1_pd(double __w) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps(float __w) { - return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; + return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w); } -/// \brief Constructs a 256-bit integer vector of [8 x i32], with each of the +/// Constructs a 256-bit integer vector of [8 x i32], with each of the /// 32-bit integral vector elements set to the specified 32-bit integral /// value. /// @@ -4258,10 +4191,10 @@ _mm256_set1_ps(float __w) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i) { - return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; + return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); } -/// \brief Constructs a 256-bit integer vector of [16 x i16], with each of the +/// Constructs a 256-bit integer vector of [16 x i16], with each of the /// 16-bit integral vector elements set to the specified 16-bit integral /// value. /// @@ -4276,11 +4209,11 @@ _mm256_set1_epi32(int __i) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w) { - return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w }; + return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w); } -/// \brief Constructs a 256-bit integer vector of [32 x i8], with each of the +/// Constructs a 256-bit integer vector of [32 x i8], with each of the /// 8-bit integral vector elements set to the specified 8-bit integral value. /// /// \headerfile @@ -4294,12 +4227,13 @@ _mm256_set1_epi16(short __w) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b) { - return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, - __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, - __b, __b, __b, __b, __b, __b, __b }; + return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b); } -/// \brief Constructs a 256-bit integer vector of [4 x i64], with each of the +/// Constructs a 256-bit integer vector of [4 x i64], with each of the /// 64-bit integral vector elements set to the specified 64-bit integral /// value. /// @@ -4314,11 +4248,11 @@ _mm256_set1_epi8(char __b) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q) { - return (__m256i)(__v4di){ __q, __q, __q, __q }; + return _mm256_set_epi64x(__q, __q, __q, __q); } /* Create __zeroed vectors */ -/// \brief Constructs a 256-bit floating-point vector of [4 x double] with all +/// Constructs a 256-bit floating-point vector of [4 x double] with all /// vector elements initialized to zero. /// /// \headerfile @@ -4329,10 +4263,10 @@ _mm256_set1_epi64x(long long __q) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void) { - return (__m256d){ 0, 0, 0, 0 }; + return __extension__ (__m256d){ 0, 0, 0, 0 }; } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] with all +/// Constructs a 256-bit floating-point vector of [8 x float] with all /// vector elements initialized to zero. /// /// \headerfile @@ -4343,10 +4277,10 @@ _mm256_setzero_pd(void) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void) { - return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; + return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; } -/// \brief Constructs a 256-bit integer vector initialized to zero. +/// Constructs a 256-bit integer vector initialized to zero. /// /// \headerfile /// @@ -4356,11 +4290,11 @@ _mm256_setzero_ps(void) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void) { - return (__m256i){ 0LL, 0LL, 0LL, 0LL }; + return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } /* Cast between vector types */ -/// \brief Casts a 256-bit floating-point vector of [4 x double] into a 256-bit +/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// floating-point vector of [8 x float]. /// /// \headerfile @@ -4377,7 +4311,7 @@ _mm256_castpd_ps(__m256d __a) return (__m256)__a; } -/// \brief Casts a 256-bit floating-point vector of [4 x double] into a 256-bit +/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// integer vector. /// /// \headerfile @@ -4394,7 +4328,7 @@ _mm256_castpd_si256(__m256d __a) return (__m256i)__a; } -/// \brief Casts a 256-bit floating-point vector of [8 x float] into a 256-bit +/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// floating-point vector of [4 x double]. /// /// \headerfile @@ -4411,7 +4345,7 @@ _mm256_castps_pd(__m256 __a) return (__m256d)__a; } -/// \brief Casts a 256-bit floating-point vector of [8 x float] into a 256-bit +/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// integer vector. /// /// \headerfile @@ -4428,7 +4362,7 @@ _mm256_castps_si256(__m256 __a) return (__m256i)__a; } -/// \brief Casts a 256-bit integer vector into a 256-bit floating-point vector +/// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [8 x float]. /// /// \headerfile @@ -4445,7 +4379,7 @@ _mm256_castsi256_ps(__m256i __a) return (__m256)__a; } -/// \brief Casts a 256-bit integer vector into a 256-bit floating-point vector +/// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [4 x double]. /// /// \headerfile @@ -4462,7 +4396,7 @@ _mm256_castsi256_pd(__m256i __a) return (__m256d)__a; } -/// \brief Returns the lower 128 bits of a 256-bit floating-point vector of +/// Returns the lower 128 bits of a 256-bit floating-point vector of /// [4 x double] as a 128-bit floating-point vector of [2 x double]. /// /// \headerfile @@ -4479,7 +4413,7 @@ _mm256_castpd256_pd128(__m256d __a) return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); } -/// \brief Returns the lower 128 bits of a 256-bit floating-point vector of +/// Returns the lower 128 bits of a 256-bit floating-point vector of /// [8 x float] as a 128-bit floating-point vector of [4 x float]. /// /// \headerfile @@ -4496,7 +4430,7 @@ _mm256_castps256_ps128(__m256 __a) return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); } -/// \brief Truncates a 256-bit integer vector into a 128-bit integer vector. +/// Truncates a 256-bit integer vector into a 128-bit integer vector. /// /// \headerfile /// @@ -4512,7 +4446,7 @@ _mm256_castsi256_si128(__m256i __a) return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); } -/// \brief Constructs a 256-bit floating-point vector of [4 x double] from a +/// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. /// /// The lower 128 bits contain the value of the source vector. The contents @@ -4533,7 +4467,7 @@ _mm256_castpd128_pd256(__m128d __a) return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] from a +/// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. /// /// The lower 128 bits contain the value of the source vector. The contents @@ -4554,7 +4488,7 @@ _mm256_castps128_ps256(__m128 __a) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1); } -/// \brief Constructs a 256-bit integer vector from a 128-bit integer vector. +/// Constructs a 256-bit integer vector from a 128-bit integer vector. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. @@ -4573,7 +4507,7 @@ _mm256_castsi128_si256(__m128i __a) return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1); } -/// \brief Constructs a 256-bit floating-point vector of [4 x double] from a +/// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 128 bits are set /// to zero. @@ -4592,7 +4526,7 @@ _mm256_zextpd128_pd256(__m128d __a) return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] from a +/// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 128 bits are set to zero. /// @@ -4610,7 +4544,7 @@ _mm256_zextps128_ps256(__m128 __a) return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7); } -/// \brief Constructs a 256-bit integer vector from a 128-bit integer vector. +/// Constructs a 256-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 128 bits are set to zero. /// @@ -4633,7 +4567,7 @@ _mm256_zextsi128_si256(__m128i __a) We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ -/// \brief Constructs a new 256-bit vector of [8 x float] by first duplicating +/// Constructs a new 256-bit vector of [8 x float] by first duplicating /// a 256-bit vector of [8 x float] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [4 x float] in the second parameter. @@ -4667,20 +4601,11 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \ - (__m256)__builtin_shufflevector( \ - (__v8sf)(__m256)(V1), \ - (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \ - (((M) & 1) ? 0 : 8), \ - (((M) & 1) ? 1 : 9), \ - (((M) & 1) ? 2 : 10), \ - (((M) & 1) ? 3 : 11), \ - (((M) & 1) ? 8 : 4), \ - (((M) & 1) ? 9 : 5), \ - (((M) & 1) ? 10 : 6), \ - (((M) & 1) ? 11 : 7) );}) - -/// \brief Constructs a new 256-bit vector of [4 x double] by first duplicating +#define _mm256_insertf128_ps(V1, V2, M) \ + (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \ + (__v4sf)(__m128)(V2), (int)(M)) + +/// Constructs a new 256-bit vector of [4 x double] by first duplicating /// a 256-bit vector of [4 x double] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [2 x double] in the second parameter. @@ -4714,16 +4639,11 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \ - (__m256d)__builtin_shufflevector( \ - (__v4df)(__m256d)(V1), \ - (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) );}) - -/// \brief Constructs a new 256-bit integer vector by first duplicating a +#define _mm256_insertf128_pd(V1, V2, M) \ + (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \ + (__v2df)(__m128d)(V2), (int)(M)) + +/// Constructs a new 256-bit integer vector by first duplicating a /// 256-bit integer vector given in the first parameter, and then replacing /// either the upper or the lower 128 bits with the contents of a 128-bit /// integer vector in the second parameter. @@ -4757,21 +4677,16 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit integer vector containing the interleaved values. -#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector( \ - (__v4di)(__m256i)(V1), \ - (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) );}) +#define _mm256_insertf128_si256(V1, V2, M) \ + (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \ + (__v4si)(__m128i)(V2), (int)(M)) /* Vector extract. We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ -/// \brief Extracts either the upper or the lower 128 bits from a 256-bit vector +/// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [8 x float], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [4 x float]. /// @@ -4792,16 +4707,10 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [4 x float] containing the extracted bits. -#define _mm256_extractf128_ps(V, M) __extension__ ({ \ - (__m128)__builtin_shufflevector( \ - (__v8sf)(__m256)(V), \ - (__v8sf)(_mm256_undefined_ps()), \ - (((M) & 1) ? 4 : 0), \ - (((M) & 1) ? 5 : 1), \ - (((M) & 1) ? 6 : 2), \ - (((M) & 1) ? 7 : 3) );}) - -/// \brief Extracts either the upper or the lower 128 bits from a 256-bit vector +#define _mm256_extractf128_ps(V, M) \ + (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)) + +/// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [4 x double], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [2 x double]. /// @@ -4822,14 +4731,10 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [2 x double] containing the extracted bits. -#define _mm256_extractf128_pd(V, M) __extension__ ({ \ - (__m128d)__builtin_shufflevector( \ - (__v4df)(__m256d)(V), \ - (__v4df)(_mm256_undefined_pd()), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) );}) - -/// \brief Extracts either the upper or the lower 128 bits from a 256-bit +#define _mm256_extractf128_pd(V, M) \ + (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)) + +/// Extracts either the upper or the lower 128 bits from a 256-bit /// integer vector, as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit integer vector. /// @@ -4850,15 +4755,11 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit integer vector containing the extracted bits. -#define _mm256_extractf128_si256(V, M) __extension__ ({ \ - (__m128i)__builtin_shufflevector( \ - (__v4di)(__m256i)(V), \ - (__v4di)(_mm256_undefined_si256()), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) );}) +#define _mm256_extractf128_si256(V, M) \ + (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M)) /* SIMD load ops (unaligned) */ -/// \brief Loads two 128-bit floating-point vectors of [4 x float] from +/// Loads two 128-bit floating-point vectors of [4 x float] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [8 x float] by concatenating the two 128-bit vectors. /// @@ -4886,7 +4787,7 @@ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1); } -/// \brief Loads two 128-bit floating-point vectors of [2 x double] from +/// Loads two 128-bit floating-point vectors of [2 x double] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [4 x double] by concatenating the two 128-bit vectors. /// @@ -4914,7 +4815,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1); } -/// \brief Loads two 128-bit integer vectors from unaligned memory locations and +/// Loads two 128-bit integer vectors from unaligned memory locations and /// constructs a 256-bit integer vector by concatenating the two 128-bit /// vectors. /// @@ -4940,7 +4841,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) } /* SIMD store ops (unaligned) */ -/// \brief Stores the upper and lower 128 bits of a 256-bit floating-point +/// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [8 x float] into two different unaligned memory locations. /// /// \headerfile @@ -4969,7 +4870,7 @@ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) _mm_storeu_ps(__addr_hi, __v128); } -/// \brief Stores the upper and lower 128 bits of a 256-bit floating-point +/// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [4 x double] into two different unaligned memory locations. /// /// \headerfile @@ -4998,7 +4899,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) _mm_storeu_pd(__addr_hi, __v128); } -/// \brief Stores the upper and lower 128 bits of a 256-bit integer vector into +/// Stores the upper and lower 128 bits of a 256-bit integer vector into /// two different unaligned memory locations. /// /// \headerfile @@ -5027,7 +4928,7 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) _mm_storeu_si128(__addr_hi, __v128); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] by +/// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. /// /// \headerfile @@ -5048,7 +4949,7 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } -/// \brief Constructs a 256-bit floating-point vector of [4 x double] by +/// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. /// /// \headerfile @@ -5066,10 +4967,10 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo) { - return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } -/// \brief Constructs a 256-bit integer vector by concatenating two 128-bit +/// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. /// /// \headerfile @@ -5086,10 +4987,10 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo) { - return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } -/// \brief Constructs a 256-bit floating-point vector of [8 x float] by +/// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. This is /// similar to _mm256_set_m128, but the order of the input parameters is /// swapped. @@ -5112,7 +5013,7 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) return _mm256_set_m128(__hi, __lo); } -/// \brief Constructs a 256-bit floating-point vector of [4 x double] by +/// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. This is /// similar to _mm256_set_m128d, but the order of the input parameters is /// swapped. @@ -5132,10 +5033,10 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi) { - return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256d)_mm256_set_m128d(__hi, __lo); } -/// \brief Constructs a 256-bit integer vector by concatenating two 128-bit +/// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. This is similar to _mm256_set_m128i, but the order of /// the input parameters is swapped. /// @@ -5153,9 +5054,10 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi) { - return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256i)_mm256_set_m128i(__hi, __lo); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVXINTRIN_H */ diff --git a/lib/clang/7.0.0/include/bmi2intrin.h b/lib/clang/8.0.0/include/bmi2intrin.h similarity index 100% rename from lib/clang/7.0.0/include/bmi2intrin.h rename to lib/clang/8.0.0/include/bmi2intrin.h diff --git a/lib/clang/7.0.0/include/bmiintrin.h b/lib/clang/8.0.0/include/bmiintrin.h similarity index 89% rename from lib/clang/7.0.0/include/bmiintrin.h rename to lib/clang/8.0.0/include/bmiintrin.h index e812a16..56c20b7 100644 --- a/lib/clang/7.0.0/include/bmiintrin.h +++ b/lib/clang/8.0.0/include/bmiintrin.h @@ -49,7 +49,7 @@ to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) -/// \brief Counts the number of trailing zero bits in the operand. +/// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// @@ -62,10 +62,10 @@ static __inline__ unsigned short __RELAXED_FN_ATTRS __tzcnt_u16(unsigned short __X) { - return __X ? __builtin_ctzs(__X) : 16; + return __builtin_ia32_tzcnt_u16(__X); } -/// \brief Performs a bitwise AND of the second operand with the one's +/// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile @@ -85,7 +85,7 @@ __andn_u32(unsigned int __X, unsigned int __Y) } /* AMD-specified, double-leading-underscore version of BEXTR */ -/// \brief Extracts the specified bits from the first operand and returns them +/// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile @@ -100,6 +100,7 @@ __andn_u32(unsigned int __X, unsigned int __Y) /// number of bits to be extracted. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. +/// \see _bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __bextr_u32(unsigned int __X, unsigned int __Y) { @@ -107,7 +108,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y) } /* Intel-specified, single-leading-underscore version of BEXTR */ -/// \brief Extracts the specified bits from the first operand and returns them +/// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile @@ -124,13 +125,14 @@ __bextr_u32(unsigned int __X, unsigned int __Y) /// Bits [7:0] specify the number of bits. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. +/// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } -/// \brief Clears all bits in the source except for the least significant bit +/// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile @@ -147,7 +149,7 @@ __blsi_u32(unsigned int __X) return __X & -__X; } -/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and +/// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// @@ -164,7 +166,7 @@ __blsmsk_u32(unsigned int __X) return __X ^ (__X - 1); } -/// \brief Clears the least significant bit that is set to 1 in the source +/// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile @@ -181,7 +183,7 @@ __blsr_u32(unsigned int __X) return __X & (__X - 1); } -/// \brief Counts the number of trailing zero bits in the operand. +/// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// @@ -194,10 +196,10 @@ __blsr_u32(unsigned int __X) static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { - return __X ? __builtin_ctz(__X) : 32; + return __builtin_ia32_tzcnt_u32(__X); } -/// \brief Counts the number of trailing zero bits in the operand. +/// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// @@ -210,7 +212,7 @@ __tzcnt_u32(unsigned int __X) static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { - return __X ? __builtin_ctz(__X) : 32; + return __builtin_ia32_tzcnt_u32(__X); } #ifdef __x86_64__ @@ -226,7 +228,7 @@ _mm_tzcnt_32(unsigned int __X) #define _tzcnt_u64(a) (__tzcnt_u64((a))) -/// \brief Performs a bitwise AND of the second operand with the one's +/// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile @@ -246,7 +248,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y) } /* AMD-specified, double-leading-underscore version of BEXTR */ -/// \brief Extracts the specified bits from the first operand and returns them +/// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile @@ -261,6 +263,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y) /// the number of bits to be extracted. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. +/// \see _bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __bextr_u64(unsigned long long __X, unsigned long long __Y) { @@ -268,7 +271,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y) } /* Intel-specified, single-leading-underscore version of BEXTR */ -/// \brief Extracts the specified bits from the first operand and returns them +/// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile @@ -285,13 +288,14 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y) /// Bits [7:0] specify the number of bits. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. +/// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } -/// \brief Clears all bits in the source except for the least significant bit +/// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile @@ -308,7 +312,7 @@ __blsi_u64(unsigned long long __X) return __X & -__X; } -/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and +/// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// @@ -325,7 +329,7 @@ __blsmsk_u64(unsigned long long __X) return __X ^ (__X - 1); } -/// \brief Clears the least significant bit that is set to 1 in the source +/// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile @@ -342,7 +346,7 @@ __blsr_u64(unsigned long long __X) return __X & (__X - 1); } -/// \brief Counts the number of trailing zero bits in the operand. +/// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// @@ -355,10 +359,10 @@ __blsr_u64(unsigned long long __X) static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { - return __X ? __builtin_ctzll(__X) : 64; + return __builtin_ia32_tzcnt_u64(__X); } -/// \brief Counts the number of trailing zero bits in the operand. +/// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// @@ -371,7 +375,7 @@ __tzcnt_u64(unsigned long long __X) static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { - return __X ? __builtin_ctzll(__X) : 64; + return __builtin_ia32_tzcnt_u64(__X); } #endif /* __x86_64__ */ diff --git a/lib/clang/7.0.0/include/cetintrin.h b/lib/clang/8.0.0/include/cetintrin.h similarity index 99% rename from lib/clang/7.0.0/include/cetintrin.h rename to lib/clang/8.0.0/include/cetintrin.h index b9e9801..120c954 100644 --- a/lib/clang/7.0.0/include/cetintrin.h +++ b/lib/clang/8.0.0/include/cetintrin.h @@ -1,4 +1,4 @@ -/*===---- cetintrin.h - CET intrinsic ------------------------------------=== +/*===---- cetintrin.h - CET intrinsic --------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/clang/8.0.0/include/cldemoteintrin.h b/lib/clang/8.0.0/include/cldemoteintrin.h new file mode 100644 index 0000000..fa78148 --- /dev/null +++ b/lib/clang/8.0.0/include/cldemoteintrin.h @@ -0,0 +1,42 @@ +/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __CLDEMOTEINTRIN_H +#define __CLDEMOTEINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("cldemote"))) + +static __inline__ void __DEFAULT_FN_ATTRS +_cldemote(const void * __P) { + __builtin_ia32_cldemote(__P); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/clang/7.0.0/include/clflushoptintrin.h b/lib/clang/8.0.0/include/clflushoptintrin.h similarity index 98% rename from lib/clang/7.0.0/include/clflushoptintrin.h rename to lib/clang/8.0.0/include/clflushoptintrin.h index f1f1330..79bb458 100644 --- a/lib/clang/7.0.0/include/clflushoptintrin.h +++ b/lib/clang/8.0.0/include/clflushoptintrin.h @@ -1,4 +1,4 @@ -/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------------------=== +/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/clang/7.0.0/include/clwbintrin.h b/lib/clang/8.0.0/include/clwbintrin.h similarity index 96% rename from lib/clang/7.0.0/include/clwbintrin.h rename to lib/clang/8.0.0/include/clwbintrin.h index 2594a6c..c09286b 100644 --- a/lib/clang/7.0.0/include/clwbintrin.h +++ b/lib/clang/8.0.0/include/clwbintrin.h @@ -31,7 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb"))) -/// \brief Writes back to memory the cache line (if modified) that contains the +/// Writes back to memory the cache line (if modified) that contains the /// linear address specified in \a __p from any level of the cache hierarchy in /// the cache coherence domain /// diff --git a/lib/clang/7.0.0/include/clzerointrin.h b/lib/clang/8.0.0/include/clzerointrin.h similarity index 89% rename from lib/clang/7.0.0/include/clzerointrin.h rename to lib/clang/8.0.0/include/clzerointrin.h index ed7478f..f4e9208 100644 --- a/lib/clang/7.0.0/include/clzerointrin.h +++ b/lib/clang/8.0.0/include/clzerointrin.h @@ -20,18 +20,18 @@ * *===-----------------------------------------------------------------------=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif -#ifndef _CLZEROINTRIN_H -#define _CLZEROINTRIN_H +#ifndef __CLZEROINTRIN_H +#define __CLZEROINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("clzero"))) -/// \brief Loads the cache line address and zero's out the cacheline +/// Loads the cache line address and zero's out the cacheline /// /// \headerfile /// @@ -45,6 +45,6 @@ _mm_clzero (void * __line) __builtin_ia32_clzero ((void *)__line); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS -#endif /* _CLZEROINTRIN_H */ +#endif /* __CLZEROINTRIN_H */ diff --git a/lib/clang/7.0.0/include/cpuid.h b/lib/clang/8.0.0/include/cpuid.h similarity index 97% rename from lib/clang/7.0.0/include/cpuid.h rename to lib/clang/8.0.0/include/cpuid.h index 832cfb2..fce6af5 100644 --- a/lib/clang/7.0.0/include/cpuid.h +++ b/lib/clang/8.0.0/include/cpuid.h @@ -156,6 +156,7 @@ #define bit_SMEP 0x00000080 #define bit_BMI2 0x00000100 #define bit_ENH_MOVSB 0x00000200 +#define bit_INVPCID 0x00000400 #define bit_RTM 0x00000800 #define bit_MPX 0x00004000 #define bit_AVX512F 0x00010000 @@ -177,6 +178,7 @@ #define bit_AVX512VBMI 0x00000002 #define bit_PKU 0x00000004 #define bit_OSPKE 0x00000010 +#define bit_WAITPKG 0x00000020 #define bit_AVX512VBMI2 0x00000040 #define bit_SHSTK 0x00000080 #define bit_GFNI 0x00000100 @@ -186,10 +188,14 @@ #define bit_AVX512BITALG 0x00001000 #define bit_AVX512VPOPCNTDQ 0x00004000 #define bit_RDPID 0x00400000 +#define bit_CLDEMOTE 0x02000000 +#define bit_MOVDIRI 0x08000000 +#define bit_MOVDIR64B 0x10000000 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 #define bit_AVX5124FMAPS 0x00000008 +#define bit_PCONFIG 0x00040000 #define bit_IBT 0x00100000 /* Features in %eax for leaf 13 sub-leaf 1 */ @@ -197,6 +203,9 @@ #define bit_XSAVEC 0x00000002 #define bit_XSAVES 0x00000008 +/* Features in %eax for leaf 0x14 sub-leaf 0 */ +#define bit_PTWRITE 0x00000010 + /* Features in %ecx for leaf 0x80000001 */ #define bit_LAHF_LM 0x00000001 #define bit_ABM 0x00000020 diff --git a/lib/clang/7.0.0/include/cuda_wrappers/algorithm b/lib/clang/8.0.0/include/cuda_wrappers/algorithm similarity index 53% rename from lib/clang/7.0.0/include/cuda_wrappers/algorithm rename to lib/clang/8.0.0/include/cuda_wrappers/algorithm index cedd707..01af183 100644 --- a/lib/clang/7.0.0/include/cuda_wrappers/algorithm +++ b/lib/clang/8.0.0/include/cuda_wrappers/algorithm @@ -24,28 +24,36 @@ #ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM #define __CLANG_CUDA_WRAPPERS_ALGORITHM -// This header defines __device__ overloads of std::min/max, but only if we're -// <= C++11. In C++14, these functions are constexpr, and so are implicitly -// __host__ __device__. +// This header defines __device__ overloads of std::min/max. // -// We don't support the initializer_list overloads because -// initializer_list::begin() and end() are not __host__ __device__ functions. +// Ideally we'd declare these functions only if we're <= C++11. In C++14, +// these functions are constexpr, and so are implicitly __host__ __device__. // -// When compiling in C++14 mode, we could force std::min/max to have different -// implementations for host and device, by declaring the device overloads -// before the constexpr overloads appear. We choose not to do this because - -// a) why write our own implementation when we can use one from the standard -// library? and -// b) libstdc++ is evil and declares min/max inside a header that is included -// *before* we include . So we'd have to unconditionally -// declare our __device__ overloads of min/max, but that would pollute -// things for people who choose not to include . +// However, the compiler being in C++14 mode does not imply that the standard +// library supports C++14. There is no macro we can test to check that the +// stdlib has constexpr std::min/max. Thus we have to unconditionally define +// our device overloads. +// +// A host+device function cannot be overloaded, and a constexpr function +// implicitly become host device if there's no explicitly host or device +// overload preceding it. So the simple thing to do would be to declare our +// device min/max overloads, and then #include_next . This way our +// device overloads would come first, and so if we have a C++14 stdlib, its +// min/max won't become host+device and conflict with our device overloads. +// +// But that also doesn't work. libstdc++ is evil and declares std::min/max in +// an internal header that is included *before* . Thus by the time +// we're inside of this file, std::min/max may already have been declared, and +// thus we can't prevent them from becoming host+device if they're constexpr. +// +// Therefore we perpetrate the following hack: We mark our __device__ overloads +// with __attribute__((enable_if(true, ""))). This causes the signature of the +// function to change without changing anything else about it. (Except that +// overload resolution will prefer it over the __host__ __device__ version +// rather than considering them equally good). #include_next -#if __cplusplus <= 201103L - // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in @@ -59,30 +67,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif +#pragma push_macro("_CPP14_CONSTEXPR") +#if __cplusplus >= 201402L +#define _CPP14_CONSTEXPR constexpr +#else +#define _CPP14_CONSTEXPR +#endif + template -inline __device__ const __T & +__attribute__((enable_if(true, ""))) +inline _CPP14_CONSTEXPR __host__ __device__ const __T & max(const __T &__a, const __T &__b, __Cmp __cmp) { return __cmp(__a, __b) ? __b : __a; } template -inline __device__ const __T & +__attribute__((enable_if(true, ""))) +inline _CPP14_CONSTEXPR __host__ __device__ const __T & max(const __T &__a, const __T &__b) { return __a < __b ? __b : __a; } template -inline __device__ const __T & +__attribute__((enable_if(true, ""))) +inline _CPP14_CONSTEXPR __host__ __device__ const __T & min(const __T &__a, const __T &__b, __Cmp __cmp) { return __cmp(__b, __a) ? __b : __a; } template -inline __device__ const __T & +__attribute__((enable_if(true, ""))) +inline _CPP14_CONSTEXPR __host__ __device__ const __T & min(const __T &__a, const __T &__b) { return __a < __b ? __a : __b; } +#pragma pop_macro("_CPP14_CONSTEXPR") + #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else @@ -92,5 +113,4 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif -#endif // __cplusplus <= 201103L #endif // __CLANG_CUDA_WRAPPERS_ALGORITHM diff --git a/lib/clang/7.0.0/include/cuda_wrappers/complex b/lib/clang/8.0.0/include/cuda_wrappers/complex similarity index 100% rename from lib/clang/7.0.0/include/cuda_wrappers/complex rename to lib/clang/8.0.0/include/cuda_wrappers/complex diff --git a/lib/clang/7.0.0/include/cuda_wrappers/new b/lib/clang/8.0.0/include/cuda_wrappers/new similarity index 100% rename from lib/clang/7.0.0/include/cuda_wrappers/new rename to lib/clang/8.0.0/include/cuda_wrappers/new diff --git a/lib/clang/7.0.0/include/emmintrin.h b/lib/clang/8.0.0/include/emmintrin.h similarity index 86% rename from lib/clang/7.0.0/include/emmintrin.h rename to lib/clang/8.0.0/include/emmintrin.h index 9302321..6d61f97 100644 --- a/lib/clang/7.0.0/include/emmintrin.h +++ b/lib/clang/8.0.0/include/emmintrin.h @@ -44,12 +44,11 @@ typedef unsigned char __v16qu __attribute__((__vector_size__(16))); * appear in the interface though. */ typedef signed char __v16qs __attribute__((__vector_size__(16))); -#include - /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) -/// \brief Adds lower double-precision values in both operands and returns the +/// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result /// are copied from the upper double-precision value of the first operand. /// @@ -71,7 +70,7 @@ _mm_add_sd(__m128d __a, __m128d __b) return __a; } -/// \brief Adds two 128-bit vectors of [2 x double]. +/// Adds two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -89,7 +88,7 @@ _mm_add_pd(__m128d __a, __m128d __b) return (__m128d)((__v2df)__a + (__v2df)__b); } -/// \brief Subtracts the lower double-precision value of the second operand +/// Subtracts the lower double-precision value of the second operand /// from the lower double-precision value of the first operand and returns /// the difference in the lower 64 bits of the result. The upper 64 bits of /// the result are copied from the upper double-precision value of the first @@ -113,7 +112,7 @@ _mm_sub_sd(__m128d __a, __m128d __b) return __a; } -/// \brief Subtracts two 128-bit vectors of [2 x double]. +/// Subtracts two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -131,7 +130,7 @@ _mm_sub_pd(__m128d __a, __m128d __b) return (__m128d)((__v2df)__a - (__v2df)__b); } -/// \brief Multiplies lower double-precision values in both operands and returns +/// Multiplies lower double-precision values in both operands and returns /// the product in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first /// operand. @@ -154,7 +153,7 @@ _mm_mul_sd(__m128d __a, __m128d __b) return __a; } -/// \brief Multiplies two 128-bit vectors of [2 x double]. +/// Multiplies two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -172,7 +171,7 @@ _mm_mul_pd(__m128d __a, __m128d __b) return (__m128d)((__v2df)__a * (__v2df)__b); } -/// \brief Divides the lower double-precision value of the first operand by the +/// Divides the lower double-precision value of the first operand by the /// lower double-precision value of the second operand and returns the /// quotient in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first @@ -196,7 +195,7 @@ _mm_div_sd(__m128d __a, __m128d __b) return __a; } -/// \brief Performs an element-by-element division of two 128-bit vectors of +/// Performs an element-by-element division of two 128-bit vectors of /// [2 x double]. /// /// \headerfile @@ -215,7 +214,7 @@ _mm_div_pd(__m128d __a, __m128d __b) return (__m128d)((__v2df)__a / (__v2df)__b); } -/// \brief Calculates the square root of the lower double-precision value of +/// Calculates the square root of the lower double-precision value of /// the second operand and returns it in the lower 64 bits of the result. /// The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. @@ -238,10 +237,10 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); - return (__m128d) { __c[0], __a[1] }; + return __extension__ (__m128d) { __c[0], __a[1] }; } -/// \brief Calculates the square root of the each of two values stored in a +/// Calculates the square root of the each of two values stored in a /// 128-bit vector of [2 x double]. /// /// \headerfile @@ -258,7 +257,7 @@ _mm_sqrt_pd(__m128d __a) return __builtin_ia32_sqrtpd((__v2df)__a); } -/// \brief Compares lower 64-bit double-precision values of both operands, and +/// Compares lower 64-bit double-precision values of both operands, and /// returns the lesser of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. @@ -282,7 +281,7 @@ _mm_min_sd(__m128d __a, __m128d __b) return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); } -/// \brief Performs element-by-element comparison of the two 128-bit vectors of +/// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the lesser of each pair of /// values. /// @@ -302,7 +301,7 @@ _mm_min_pd(__m128d __a, __m128d __b) return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares lower 64-bit double-precision values of both operands, and +/// Compares lower 64-bit double-precision values of both operands, and /// returns the greater of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. @@ -326,7 +325,7 @@ _mm_max_sd(__m128d __a, __m128d __b) return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); } -/// \brief Performs element-by-element comparison of the two 128-bit vectors of +/// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the greater of each pair /// of values. /// @@ -346,7 +345,7 @@ _mm_max_pd(__m128d __a, __m128d __b) return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } -/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double]. +/// Performs a bitwise AND of two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -364,7 +363,7 @@ _mm_and_pd(__m128d __a, __m128d __b) return (__m128d)((__v2du)__a & (__v2du)__b); } -/// \brief Performs a bitwise AND of two 128-bit vectors of [2 x double], using +/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile @@ -385,7 +384,7 @@ _mm_andnot_pd(__m128d __a, __m128d __b) return (__m128d)(~(__v2du)__a & (__v2du)__b); } -/// \brief Performs a bitwise OR of two 128-bit vectors of [2 x double]. +/// Performs a bitwise OR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -403,7 +402,7 @@ _mm_or_pd(__m128d __a, __m128d __b) return (__m128d)((__v2du)__a | (__v2du)__b); } -/// \brief Performs a bitwise XOR of two 128-bit vectors of [2 x double]. +/// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// @@ -421,7 +420,7 @@ _mm_xor_pd(__m128d __a, __m128d __b) return (__m128d)((__v2du)__a ^ (__v2du)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 /// for false, 0xFFFFFFFFFFFFFFFF for true. /// @@ -440,7 +439,7 @@ _mm_cmpeq_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than those in the second operand. Each comparison /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. @@ -460,7 +459,7 @@ _mm_cmplt_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// @@ -481,7 +480,7 @@ _mm_cmple_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than those in the second operand. /// @@ -502,7 +501,7 @@ _mm_cmpgt_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// @@ -523,7 +522,7 @@ _mm_cmpge_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// @@ -546,7 +545,7 @@ _mm_cmpord_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// @@ -570,7 +569,7 @@ _mm_cmpunord_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unequal to those in the second operand. /// @@ -591,7 +590,7 @@ _mm_cmpneq_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than those in the second operand. /// @@ -612,7 +611,7 @@ _mm_cmpnlt_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// @@ -633,7 +632,7 @@ _mm_cmpnle_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than those in the second operand. /// @@ -654,7 +653,7 @@ _mm_cmpngt_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); } -/// \brief Compares each of the corresponding double-precision values of the +/// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// @@ -675,7 +674,7 @@ _mm_cmpnge_pd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. @@ -698,7 +697,7 @@ _mm_cmpeq_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. @@ -723,7 +722,7 @@ _mm_cmplt_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. @@ -748,7 +747,7 @@ _mm_cmple_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. @@ -771,10 +770,10 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); - return (__m128d) { __c[0], __a[1] }; + return __extension__ (__m128d) { __c[0], __a[1] }; } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. @@ -797,10 +796,10 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); - return (__m128d) { __c[0], __a[1] }; + return __extension__ (__m128d) { __c[0], __a[1] }; } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "ordered" with respect to the /// corresponding value in the second parameter. @@ -827,7 +826,7 @@ _mm_cmpord_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "unordered" with respect to the /// corresponding value in the second parameter. @@ -855,7 +854,7 @@ _mm_cmpunord_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. @@ -880,7 +879,7 @@ _mm_cmpneq_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than the corresponding /// value in the second parameter. @@ -905,7 +904,7 @@ _mm_cmpnlt_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than or equal to the /// corresponding value in the second parameter. @@ -930,7 +929,7 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than the corresponding /// value in the second parameter. @@ -953,10 +952,10 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); - return (__m128d) { __c[0], __a[1] }; + return __extension__ (__m128d) { __c[0], __a[1] }; } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than or equal to the /// corresponding value in the second parameter. @@ -979,10 +978,10 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); - return (__m128d) { __c[0], __a[1] }; + return __extension__ (__m128d) { __c[0], __a[1] }; } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison yields 0 for false, 1 for true. If either of the two @@ -1006,7 +1005,7 @@ _mm_comieq_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. @@ -1032,7 +1031,7 @@ _mm_comilt_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. @@ -1058,7 +1057,7 @@ _mm_comile_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. @@ -1084,7 +1083,7 @@ _mm_comigt_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. @@ -1110,7 +1109,7 @@ _mm_comige_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. @@ -1136,7 +1135,7 @@ _mm_comineq_sd(__m128d __a, __m128d __b) return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. The /// comparison yields 0 for false, 1 for true. /// @@ -1160,7 +1159,7 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. @@ -1186,7 +1185,7 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. @@ -1212,7 +1211,7 @@ _mm_ucomile_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. @@ -1238,7 +1237,7 @@ _mm_ucomigt_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. @@ -1264,7 +1263,7 @@ _mm_ucomige_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each of +/// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. @@ -1290,7 +1289,7 @@ _mm_ucomineq_sd(__m128d __a, __m128d __b) return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); } -/// \brief Converts the two double-precision floating-point elements of a +/// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two single-precision floating-point /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. /// The upper 64 bits of the result vector are set to zero. @@ -1309,7 +1308,7 @@ _mm_cvtpd_ps(__m128d __a) return __builtin_ia32_cvtpd2ps((__v2df)__a); } -/// \brief Converts the lower two single-precision floating-point elements of a +/// Converts the lower two single-precision floating-point elements of a /// 128-bit vector of [4 x float] into two double-precision floating-point /// values, returned in a 128-bit vector of [2 x double]. The upper two /// elements of the input vector are unused. @@ -1330,7 +1329,7 @@ _mm_cvtps_pd(__m128 __a) __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } -/// \brief Converts the lower two integer elements of a 128-bit vector of +/// Converts the lower two integer elements of a 128-bit vector of /// [4 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// @@ -1353,7 +1352,7 @@ _mm_cvtepi32_pd(__m128i __a) __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } -/// \brief Converts the two double-precision floating-point elements of a +/// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper /// 64 bits of the result vector are set to zero. @@ -1372,7 +1371,7 @@ _mm_cvtpd_epi32(__m128d __a) return __builtin_ia32_cvtpd2dq((__v2df)__a); } -/// \brief Converts the low-order element of a 128-bit vector of [2 x double] +/// Converts the low-order element of a 128-bit vector of [2 x double] /// into a 32-bit signed integer value. /// /// \headerfile @@ -1389,7 +1388,7 @@ _mm_cvtsd_si32(__m128d __a) return __builtin_ia32_cvtsd2si((__v2df)__a); } -/// \brief Converts the lower double-precision floating-point element of a +/// Converts the lower double-precision floating-point element of a /// 128-bit vector of [2 x double], in the second parameter, into a /// single-precision floating-point value, returned in the lower 32 bits of a /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are @@ -1414,7 +1413,7 @@ _mm_cvtsd_ss(__m128 __a, __m128d __b) return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } -/// \brief Converts a 32-bit signed integer value, in the second parameter, into +/// Converts a 32-bit signed integer value, in the second parameter, into /// a double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector /// are copied from the upper 64 bits of the first parameter. @@ -1438,7 +1437,7 @@ _mm_cvtsi32_sd(__m128d __a, int __b) return __a; } -/// \brief Converts the lower single-precision floating-point element of a +/// Converts the lower single-precision floating-point element of a /// 128-bit vector of [4 x float], in the second parameter, into a /// double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector @@ -1464,7 +1463,7 @@ _mm_cvtss_sd(__m128d __a, __m128 __b) return __a; } -/// \brief Converts the two double-precision floating-point elements of a +/// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. /// @@ -1487,7 +1486,7 @@ _mm_cvttpd_epi32(__m128d __a) return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); } -/// \brief Converts the low-order element of a [2 x double] vector into a 32-bit +/// Converts the low-order element of a [2 x double] vector into a 32-bit /// signed integer value, truncating the result when it is inexact. /// /// \headerfile @@ -1505,7 +1504,7 @@ _mm_cvttsd_si32(__m128d __a) return __builtin_ia32_cvttsd2si((__v2df)__a); } -/// \brief Converts the two double-precision floating-point elements of a +/// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// @@ -1516,13 +1515,13 @@ _mm_cvttsd_si32(__m128d __a) /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); } -/// \brief Converts the two double-precision floating-point elements of a +/// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// @@ -1536,13 +1535,13 @@ _mm_cvtpd_pi32(__m128d __a) /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); } -/// \brief Converts the two signed 32-bit integer elements of a 64-bit vector of +/// Converts the two signed 32-bit integer elements of a 64-bit vector of /// [2 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// @@ -1553,13 +1552,13 @@ _mm_cvttpd_pi32(__m128d __a) /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 128-bit vector of [2 x double] containing the converted values. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { return __builtin_ia32_cvtpi2pd((__v2si)__a); } -/// \brief Returns the low-order element of a 128-bit vector of [2 x double] as +/// Returns the low-order element of a 128-bit vector of [2 x double] as /// a double-precision floating-point value. /// /// \headerfile @@ -1576,7 +1575,7 @@ _mm_cvtsd_f64(__m128d __a) return __a[0]; } -/// \brief Loads a 128-bit floating-point vector of [2 x double] from an aligned +/// Loads a 128-bit floating-point vector of [2 x double] from an aligned /// memory location. /// /// \headerfile @@ -1593,7 +1592,7 @@ _mm_load_pd(double const *__dp) return *(__m128d*)__dp; } -/// \brief Loads a double-precision floating-point value from a specified memory +/// Loads a double-precision floating-point value from a specified memory /// location and duplicates it to both vector elements of a 128-bit vector of /// [2 x double]. /// @@ -1612,12 +1611,12 @@ _mm_load1_pd(double const *__dp) double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; - return (__m128d){ __u, __u }; + return __extension__ (__m128d){ __u, __u }; } #define _mm_load_pd1(dp) _mm_load1_pd(dp) -/// \brief Loads two double-precision values, in reverse order, from an aligned +/// Loads two double-precision values, in reverse order, from an aligned /// memory location into a 128-bit vector of [2 x double]. /// /// \headerfile @@ -1638,7 +1637,7 @@ _mm_loadr_pd(double const *__dp) return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); } -/// \brief Loads a 128-bit floating-point vector of [2 x double] from an +/// Loads a 128-bit floating-point vector of [2 x double] from an /// unaligned memory location. /// /// \headerfile @@ -1658,7 +1657,7 @@ _mm_loadu_pd(double const *__dp) return ((struct __loadu_pd*)__dp)->__v; } -/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer +/// Loads a 64-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile @@ -1676,10 +1675,52 @@ _mm_loadu_si64(void const *__a) long long __v; } __attribute__((__packed__, __may_alias__)); long long __u = ((struct __loadu_si64*)__a)->__v; - return (__m128i){__u, 0L}; + return __extension__ (__m128i)(__v2di){__u, 0LL}; +} + +/// Loads a 32-bit integer value to the low element of a 128-bit integer +/// vector and clears the upper element. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVD / MOVD instruction. +/// +/// \param __a +/// A pointer to a 32-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns A 128-bit vector of [4 x i32] containing the loaded value. +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_loadu_si32(void const *__a) +{ + struct __loadu_si32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + int __u = ((struct __loadu_si32*)__a)->__v; + return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; +} + +/// Loads a 16-bit integer value to the low element of a 128-bit integer +/// vector and clears the upper element. +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a specific instruction. +/// +/// \param __a +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns A 128-bit vector of [8 x i16] containing the loaded value. +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_loadu_si16(void const *__a) +{ + struct __loadu_si16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + short __u = ((struct __loadu_si16*)__a)->__v; + return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; } -/// \brief Loads a 64-bit double-precision value to the low element of a +/// Loads a 64-bit double-precision value to the low element of a /// 128-bit integer vector and clears the upper element. /// /// \headerfile @@ -1697,10 +1738,10 @@ _mm_load_sd(double const *__dp) double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((struct __mm_load_sd_struct*)__dp)->__u; - return (__m128d){ __u, 0 }; + return __extension__ (__m128d){ __u, 0 }; } -/// \brief Loads a double-precision value into the high-order bits of a 128-bit +/// Loads a double-precision value into the high-order bits of a 128-bit /// vector of [2 x double]. The low-order bits are copied from the low-order /// bits of the first operand. /// @@ -1724,10 +1765,10 @@ _mm_loadh_pd(__m128d __a, double const *__dp) double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; - return (__m128d){ __a[0], __u }; + return __extension__ (__m128d){ __a[0], __u }; } -/// \brief Loads a double-precision value into the low-order bits of a 128-bit +/// Loads a double-precision value into the low-order bits of a 128-bit /// vector of [2 x double]. The high-order bits are copied from the /// high-order bits of the first operand. /// @@ -1751,10 +1792,10 @@ _mm_loadl_pd(__m128d __a, double const *__dp) double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; - return (__m128d){ __u, __a[1] }; + return __extension__ (__m128d){ __u, __a[1] }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double] with +/// Constructs a 128-bit floating-point vector of [2 x double] with /// unspecified content. This could be used as an argument to another /// intrinsic function where the argument is required but the value is not /// actually used. @@ -1771,7 +1812,7 @@ _mm_undefined_pd(void) return (__m128d)__builtin_ia32_undef128(); } -/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower +/// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits of the vector are initialized with the specified double-precision /// floating-point value. The upper 64 bits are set to zero. /// @@ -1788,10 +1829,10 @@ _mm_undefined_pd(void) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { - return (__m128d){ __w, 0 }; + return __extension__ (__m128d){ __w, 0 }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each +/// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// @@ -1806,10 +1847,10 @@ _mm_set_sd(double __w) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { - return (__m128d){ __w, __w }; + return __extension__ (__m128d){ __w, __w }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each +/// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// @@ -1827,7 +1868,7 @@ _mm_set_pd1(double __w) return _mm_set1_pd(__w); } -/// \brief Constructs a 128-bit floating-point vector of [2 x double] +/// Constructs a 128-bit floating-point vector of [2 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile @@ -1844,10 +1885,10 @@ _mm_set_pd1(double __w) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x) { - return (__m128d){ __x, __w }; + return __extension__ (__m128d){ __x, __w }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double], +/// Constructs a 128-bit floating-point vector of [2 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// @@ -1865,10 +1906,10 @@ _mm_set_pd(double __w, double __x) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x) { - return (__m128d){ __w, __x }; + return __extension__ (__m128d){ __w, __x }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double] +/// Constructs a 128-bit floating-point vector of [2 x double] /// initialized to zero. /// /// \headerfile @@ -1880,10 +1921,10 @@ _mm_setr_pd(double __w, double __x) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { - return (__m128d){ 0, 0 }; + return __extension__ (__m128d){ 0, 0 }; } -/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower +/// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// @@ -1901,10 +1942,11 @@ _mm_setzero_pd(void) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b) { - return (__m128d){ __b[0], __a[1] }; + __a[0] = __b[0]; + return __a; } -/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a +/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile @@ -1924,7 +1966,7 @@ _mm_store_sd(double *__dp, __m128d __a) ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; } -/// \brief Moves packed double-precision values from a 128-bit vector of +/// Moves packed double-precision values from a 128-bit vector of /// [2 x double] to a memory location. /// /// \headerfile @@ -1943,7 +1985,7 @@ _mm_store_pd(double *__dp, __m128d __a) *(__m128d*)__dp = __a; } -/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to +/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile @@ -1964,7 +2006,7 @@ _mm_store1_pd(double *__dp, __m128d __a) _mm_store_pd(__dp, __a); } -/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to +/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile @@ -1981,10 +2023,10 @@ _mm_store1_pd(double *__dp, __m128d __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a) { - return _mm_store1_pd(__dp, __a); + _mm_store1_pd(__dp, __a); } -/// \brief Stores a 128-bit vector of [2 x double] into an unaligned memory +/// Stores a 128-bit vector of [2 x double] into an unaligned memory /// location. /// /// \headerfile @@ -2005,7 +2047,7 @@ _mm_storeu_pd(double *__dp, __m128d __a) ((struct __storeu_pd*)__dp)->__v = __a; } -/// \brief Stores two double-precision values, in reverse order, from a 128-bit +/// Stores two double-precision values, in reverse order, from a 128-bit /// vector of [2 x double] to a 16-byte aligned memory location. /// /// \headerfile @@ -2026,7 +2068,7 @@ _mm_storer_pd(double *__dp, __m128d __a) *(__m128d *)__dp = __a; } -/// \brief Stores the upper 64 bits of a 128-bit vector of [2 x double] to a +/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile @@ -2046,7 +2088,7 @@ _mm_storeh_pd(double *__dp, __m128d __a) ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; } -/// \brief Stores the lower 64 bits of a 128-bit vector of [2 x double] to a +/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile @@ -2066,7 +2108,7 @@ _mm_storel_pd(double *__dp, __m128d __a) ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; } -/// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8], +/// Adds the corresponding elements of two 128-bit vectors of [16 x i8], /// saving the lower 8 bits of each sum in the corresponding element of a /// 128-bit result vector of [16 x i8]. /// @@ -2088,7 +2130,7 @@ _mm_add_epi8(__m128i __a, __m128i __b) return (__m128i)((__v16qu)__a + (__v16qu)__b); } -/// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16], +/// Adds the corresponding elements of two 128-bit vectors of [8 x i16], /// saving the lower 16 bits of each sum in the corresponding element of a /// 128-bit result vector of [8 x i16]. /// @@ -2110,7 +2152,7 @@ _mm_add_epi16(__m128i __a, __m128i __b) return (__m128i)((__v8hu)__a + (__v8hu)__b); } -/// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32], +/// Adds the corresponding elements of two 128-bit vectors of [4 x i32], /// saving the lower 32 bits of each sum in the corresponding element of a /// 128-bit result vector of [4 x i32]. /// @@ -2132,7 +2174,7 @@ _mm_add_epi32(__m128i __a, __m128i __b) return (__m128i)((__v4su)__a + (__v4su)__b); } -/// \brief Adds two signed or unsigned 64-bit integer values, returning the +/// Adds two signed or unsigned 64-bit integer values, returning the /// lower 64 bits of the sum. /// /// \headerfile @@ -2144,13 +2186,13 @@ _mm_add_epi32(__m128i __a, __m128i __b) /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); } -/// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64], +/// Adds the corresponding elements of two 128-bit vectors of [2 x i64], /// saving the lower 64 bits of each sum in the corresponding element of a /// 128-bit result vector of [2 x i64]. /// @@ -2172,7 +2214,7 @@ _mm_add_epi64(__m128i __a, __m128i __b) return (__m128i)((__v2du)__a + (__v2du)__b); } -/// \brief Adds, with saturation, the corresponding elements of two 128-bit +/// Adds, with saturation, the corresponding elements of two 128-bit /// signed [16 x i8] vectors, saving each sum in the corresponding element of /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. @@ -2193,7 +2235,7 @@ _mm_adds_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); } -/// \brief Adds, with saturation, the corresponding elements of two 128-bit +/// Adds, with saturation, the corresponding elements of two 128-bit /// signed [8 x i16] vectors, saving each sum in the corresponding element of /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to @@ -2215,7 +2257,7 @@ _mm_adds_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Adds, with saturation, the corresponding elements of two 128-bit +/// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [16 x i8] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF /// are saturated to 0xFF. Negative sums are saturated to 0x00. @@ -2236,7 +2278,7 @@ _mm_adds_epu8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); } -/// \brief Adds, with saturation, the corresponding elements of two 128-bit +/// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [8 x i16] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [8 x i16]. Positive sums greater than /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. @@ -2257,7 +2299,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Computes the rounded avarages of corresponding elements of two +/// Computes the rounded avarages of corresponding elements of two /// 128-bit unsigned [16 x i8] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// @@ -2281,7 +2323,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b) >> 1, __v16qu); } -/// \brief Computes the rounded avarages of corresponding elements of two +/// Computes the rounded avarages of corresponding elements of two /// 128-bit unsigned [8 x i16] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// @@ -2305,7 +2347,7 @@ _mm_avg_epu16(__m128i __a, __m128i __b) >> 1, __v8hu); } -/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16] +/// Multiplies the corresponding elements of two 128-bit signed [8 x i16] /// vectors, producing eight intermediate 32-bit signed integer products, and /// adds the consecutive pairs of 32-bit products to form a 128-bit signed /// [4 x i32] vector. @@ -2331,7 +2373,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); } -/// \brief Compares corresponding elements of two 128-bit signed [8 x i16] +/// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// @@ -2351,7 +2393,7 @@ _mm_max_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8] +/// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// @@ -2371,7 +2413,7 @@ _mm_max_epu8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); } -/// \brief Compares corresponding elements of two 128-bit signed [8 x i16] +/// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// @@ -2391,7 +2433,7 @@ _mm_min_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Compares corresponding elements of two 128-bit unsigned [16 x i8] +/// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// @@ -2411,7 +2453,7 @@ _mm_min_epu8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); } -/// \brief Multiplies the corresponding elements of two signed [8 x i16] +/// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// @@ -2431,7 +2473,7 @@ _mm_mulhi_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Multiplies the corresponding elements of two unsigned [8 x i16] +/// Multiplies the corresponding elements of two unsigned [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit unsigned [8 x i16] result vector. /// @@ -2451,7 +2493,7 @@ _mm_mulhi_epu16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Multiplies the corresponding elements of two signed [8 x i16] +/// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the lower 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// @@ -2471,7 +2513,7 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) return (__m128i)((__v8hu)__a * (__v8hu)__b); } -/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits +/// Multiplies 32-bit unsigned integer values contained in the lower bits /// of the two 64-bit integer vectors and returns the 64-bit unsigned /// product. /// @@ -2484,13 +2526,13 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b) { return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); } -/// \brief Multiplies 32-bit unsigned integer values contained in the lower +/// Multiplies 32-bit unsigned integer values contained in the lower /// bits of the corresponding elements of two [2 x i64] vectors, and returns /// the 64-bit products in the corresponding elements of a [2 x i64] vector. /// @@ -2509,7 +2551,7 @@ _mm_mul_epu32(__m128i __a, __m128i __b) return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } -/// \brief Computes the absolute differences of corresponding 8-bit integer +/// Computes the absolute differences of corresponding 8-bit integer /// values in two 128-bit vectors. Sums the first 8 absolute differences, and /// separately sums the second 8 absolute differences. Packs these two /// unsigned 16-bit integer sums into the upper and lower elements of a @@ -2531,7 +2573,7 @@ _mm_sad_epu8(__m128i __a, __m128i __b) return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); } -/// \brief Subtracts the corresponding 8-bit integer values in the operands. +/// Subtracts the corresponding 8-bit integer values in the operands. /// /// \headerfile /// @@ -2549,7 +2591,7 @@ _mm_sub_epi8(__m128i __a, __m128i __b) return (__m128i)((__v16qu)__a - (__v16qu)__b); } -/// \brief Subtracts the corresponding 16-bit integer values in the operands. +/// Subtracts the corresponding 16-bit integer values in the operands. /// /// \headerfile /// @@ -2567,7 +2609,7 @@ _mm_sub_epi16(__m128i __a, __m128i __b) return (__m128i)((__v8hu)__a - (__v8hu)__b); } -/// \brief Subtracts the corresponding 32-bit integer values in the operands. +/// Subtracts the corresponding 32-bit integer values in the operands. /// /// \headerfile /// @@ -2585,7 +2627,7 @@ _mm_sub_epi32(__m128i __a, __m128i __b) return (__m128i)((__v4su)__a - (__v4su)__b); } -/// \brief Subtracts signed or unsigned 64-bit integer values and writes the +/// Subtracts signed or unsigned 64-bit integer values and writes the /// difference to the corresponding bits in the destination. /// /// \headerfile @@ -2598,13 +2640,13 @@ _mm_sub_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); } -/// \brief Subtracts the corresponding elements of two [2 x i64] vectors. +/// Subtracts the corresponding elements of two [2 x i64] vectors. /// /// \headerfile /// @@ -2622,7 +2664,7 @@ _mm_sub_epi64(__m128i __a, __m128i __b) return (__m128i)((__v2du)__a - (__v2du)__b); } -/// \brief Subtracts corresponding 8-bit signed integer values in the input and +/// Subtracts corresponding 8-bit signed integer values in the input and /// returns the differences in the corresponding bytes in the destination. /// Differences greater than 0x7F are saturated to 0x7F, and differences less /// than 0x80 are saturated to 0x80. @@ -2643,7 +2685,7 @@ _mm_subs_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); } -/// \brief Subtracts corresponding 16-bit signed integer values in the input and +/// Subtracts corresponding 16-bit signed integer values in the input and /// returns the differences in the corresponding bytes in the destination. /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less /// than 0x8000 are saturated to 0x8000. @@ -2664,7 +2706,7 @@ _mm_subs_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Subtracts corresponding 8-bit unsigned integer values in the input +/// Subtracts corresponding 8-bit unsigned integer values in the input /// and returns the differences in the corresponding bytes in the /// destination. Differences less than 0x00 are saturated to 0x00. /// @@ -2684,7 +2726,7 @@ _mm_subs_epu8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); } -/// \brief Subtracts corresponding 16-bit unsigned integer values in the input +/// Subtracts corresponding 16-bit unsigned integer values in the input /// and returns the differences in the corresponding bytes in the /// destination. Differences less than 0x0000 are saturated to 0x0000. /// @@ -2704,7 +2746,7 @@ _mm_subs_epu16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Performs a bitwise AND of two 128-bit integer vectors. +/// Performs a bitwise AND of two 128-bit integer vectors. /// /// \headerfile /// @@ -2722,7 +2764,7 @@ _mm_and_si128(__m128i __a, __m128i __b) return (__m128i)((__v2du)__a & (__v2du)__b); } -/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the +/// Performs a bitwise AND of two 128-bit integer vectors, using the /// one's complement of the values contained in the first source operand. /// /// \headerfile @@ -2741,7 +2783,7 @@ _mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } -/// \brief Performs a bitwise OR of two 128-bit integer vectors. +/// Performs a bitwise OR of two 128-bit integer vectors. /// /// \headerfile /// @@ -2759,7 +2801,7 @@ _mm_or_si128(__m128i __a, __m128i __b) return (__m128i)((__v2du)__a | (__v2du)__b); } -/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors. +/// Performs a bitwise exclusive OR of two 128-bit integer vectors. /// /// \headerfile /// @@ -2777,7 +2819,7 @@ _mm_xor_si128(__m128i __a, __m128i __b) return (__m128i)((__v2du)__a ^ (__v2du)__b); } -/// \brief Left-shifts the 128-bit integer vector operand by the specified +/// Left-shifts the 128-bit integer vector operand by the specified /// number of bytes. Low-order bits are cleared. /// /// \headerfile @@ -2794,31 +2836,13 @@ _mm_xor_si128(__m128i __a, __m128i __b) /// An immediate value specifying the number of bytes to left-shift operand /// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. -#define _mm_slli_si128(a, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector( \ - (__v16qi)_mm_setzero_si128(), \ - (__v16qi)(__m128i)(a), \ - ((char)(imm)&0xF0) ? 0 : 16 - (char)(imm), \ - ((char)(imm)&0xF0) ? 1 : 17 - (char)(imm), \ - ((char)(imm)&0xF0) ? 2 : 18 - (char)(imm), \ - ((char)(imm)&0xF0) ? 3 : 19 - (char)(imm), \ - ((char)(imm)&0xF0) ? 4 : 20 - (char)(imm), \ - ((char)(imm)&0xF0) ? 5 : 21 - (char)(imm), \ - ((char)(imm)&0xF0) ? 6 : 22 - (char)(imm), \ - ((char)(imm)&0xF0) ? 7 : 23 - (char)(imm), \ - ((char)(imm)&0xF0) ? 8 : 24 - (char)(imm), \ - ((char)(imm)&0xF0) ? 9 : 25 - (char)(imm), \ - ((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \ - ((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \ - ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \ - ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \ - ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \ - ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); }) +#define _mm_slli_si128(a, imm) \ + (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) #define _mm_bslli_si128(a, imm) \ - _mm_slli_si128((a), (imm)) + (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) -/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand +/// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2837,7 +2861,7 @@ _mm_slli_epi16(__m128i __a, int __count) return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } -/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand +/// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2856,7 +2880,7 @@ _mm_sll_epi16(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); } -/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand +/// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2875,7 +2899,7 @@ _mm_slli_epi32(__m128i __a, int __count) return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } -/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand +/// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2894,7 +2918,7 @@ _mm_sll_epi32(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); } -/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand +/// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2913,7 +2937,7 @@ _mm_slli_epi64(__m128i __a, int __count) return __builtin_ia32_psllqi128((__v2di)__a, __count); } -/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand +/// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile @@ -2932,7 +2956,7 @@ _mm_sll_epi64(__m128i __a, __m128i __count) return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); } -/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand +/// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// @@ -2952,7 +2976,7 @@ _mm_srai_epi16(__m128i __a, int __count) return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } -/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand +/// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// @@ -2972,7 +2996,7 @@ _mm_sra_epi16(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); } -/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand +/// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// @@ -2992,7 +3016,7 @@ _mm_srai_epi32(__m128i __a, int __count) return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } -/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand +/// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// @@ -3012,7 +3036,7 @@ _mm_sra_epi32(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } -/// \brief Right-shifts the 128-bit integer vector operand by the specified +/// Right-shifts the 128-bit integer vector operand by the specified /// number of bytes. High-order bits are cleared. /// /// \headerfile @@ -3029,31 +3053,13 @@ _mm_sra_epi32(__m128i __a, __m128i __count) /// An immediate value specifying the number of bytes to right-shift operand /// \a a. /// \returns A 128-bit integer vector containing the right-shifted value. -#define _mm_srli_si128(a, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector( \ - (__v16qi)(__m128i)(a), \ - (__v16qi)_mm_setzero_si128(), \ - ((char)(imm)&0xF0) ? 16 : (char)(imm) + 0, \ - ((char)(imm)&0xF0) ? 17 : (char)(imm) + 1, \ - ((char)(imm)&0xF0) ? 18 : (char)(imm) + 2, \ - ((char)(imm)&0xF0) ? 19 : (char)(imm) + 3, \ - ((char)(imm)&0xF0) ? 20 : (char)(imm) + 4, \ - ((char)(imm)&0xF0) ? 21 : (char)(imm) + 5, \ - ((char)(imm)&0xF0) ? 22 : (char)(imm) + 6, \ - ((char)(imm)&0xF0) ? 23 : (char)(imm) + 7, \ - ((char)(imm)&0xF0) ? 24 : (char)(imm) + 8, \ - ((char)(imm)&0xF0) ? 25 : (char)(imm) + 9, \ - ((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \ - ((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \ - ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \ - ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \ - ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \ - ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); }) +#define _mm_srli_si128(a, imm) \ + (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) #define _mm_bsrli_si128(a, imm) \ - _mm_srli_si128((a), (imm)) + (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) -/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector +/// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3072,7 +3078,7 @@ _mm_srli_epi16(__m128i __a, int __count) return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } -/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector +/// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3091,7 +3097,7 @@ _mm_srl_epi16(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); } -/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector +/// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3110,7 +3116,7 @@ _mm_srli_epi32(__m128i __a, int __count) return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } -/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector +/// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3129,7 +3135,7 @@ _mm_srl_epi32(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); } -/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector +/// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3148,7 +3154,7 @@ _mm_srli_epi64(__m128i __a, int __count) return __builtin_ia32_psrlqi128((__v2di)__a, __count); } -/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector +/// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile @@ -3167,7 +3173,7 @@ _mm_srl_epi64(__m128i __a, __m128i __count) return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); } -/// \brief Compares each of the corresponding 8-bit values of the 128-bit +/// Compares each of the corresponding 8-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF /// for true. /// @@ -3186,7 +3192,7 @@ _mm_cmpeq_epi8(__m128i __a, __m128i __b) return (__m128i)((__v16qi)__a == (__v16qi)__b); } -/// \brief Compares each of the corresponding 16-bit values of the 128-bit +/// Compares each of the corresponding 16-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, /// 0xFFFF for true. /// @@ -3205,7 +3211,7 @@ _mm_cmpeq_epi16(__m128i __a, __m128i __b) return (__m128i)((__v8hi)__a == (__v8hi)__b); } -/// \brief Compares each of the corresponding 32-bit values of the 128-bit +/// Compares each of the corresponding 32-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, /// 0xFFFFFFFF for true. /// @@ -3224,7 +3230,7 @@ _mm_cmpeq_epi32(__m128i __a, __m128i __b) return (__m128i)((__v4si)__a == (__v4si)__b); } -/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit +/// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. Each comparison yields 0x0 for /// false, 0xFF for true. @@ -3246,7 +3252,7 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b) return (__m128i)((__v16qs)__a > (__v16qs)__b); } -/// \brief Compares each of the corresponding signed 16-bit values of the +/// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// @@ -3267,7 +3273,7 @@ _mm_cmpgt_epi16(__m128i __a, __m128i __b) return (__m128i)((__v8hi)__a > (__v8hi)__b); } -/// \brief Compares each of the corresponding signed 32-bit values of the +/// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// @@ -3288,7 +3294,7 @@ _mm_cmpgt_epi32(__m128i __a, __m128i __b) return (__m128i)((__v4si)__a > (__v4si)__b); } -/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit +/// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are less /// than those in the second operand. /// @@ -3309,7 +3315,7 @@ _mm_cmplt_epi8(__m128i __a, __m128i __b) return _mm_cmpgt_epi8(__b, __a); } -/// \brief Compares each of the corresponding signed 16-bit values of the +/// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// @@ -3330,7 +3336,7 @@ _mm_cmplt_epi16(__m128i __a, __m128i __b) return _mm_cmpgt_epi16(__b, __a); } -/// \brief Compares each of the corresponding signed 32-bit values of the +/// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// @@ -3352,7 +3358,7 @@ _mm_cmplt_epi32(__m128i __a, __m128i __b) } #ifdef __x86_64__ -/// \brief Converts a 64-bit signed integer value from the second operand into a +/// Converts a 64-bit signed integer value from the second operand into a /// double-precision value and returns it in the lower element of a [2 x /// double] vector; the upper element of the returned vector is copied from /// the upper element of the first operand. @@ -3376,7 +3382,7 @@ _mm_cvtsi64_sd(__m128d __a, long long __b) return __a; } -/// \brief Converts the first (lower) element of a vector of [2 x double] into a +/// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, according to the current rounding mode. /// /// \headerfile @@ -3393,7 +3399,7 @@ _mm_cvtsd_si64(__m128d __a) return __builtin_ia32_cvtsd2si64((__v2df)__a); } -/// \brief Converts the first (lower) element of a vector of [2 x double] into a +/// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, truncating the result when it is inexact. /// /// \headerfile @@ -3412,7 +3418,7 @@ _mm_cvttsd_si64(__m128d __a) } #endif -/// \brief Converts a vector of [4 x i32] into a vector of [4 x float]. +/// Converts a vector of [4 x i32] into a vector of [4 x float]. /// /// \headerfile /// @@ -3424,10 +3430,10 @@ _mm_cvttsd_si64(__m128d __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { - return __builtin_ia32_cvtdq2ps((__v4si)__a); + return (__m128)__builtin_convertvector((__v4si)__a, __v4sf); } -/// \brief Converts a vector of [4 x float] into a vector of [4 x i32]. +/// Converts a vector of [4 x float] into a vector of [4 x i32]. /// /// \headerfile /// @@ -3443,7 +3449,7 @@ _mm_cvtps_epi32(__m128 __a) return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); } -/// \brief Converts a vector of [4 x float] into a vector of [4 x i32], +/// Converts a vector of [4 x float] into a vector of [4 x i32], /// truncating the result when it is inexact. /// /// \headerfile @@ -3460,7 +3466,7 @@ _mm_cvttps_epi32(__m128 __a) return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } -/// \brief Returns a vector of [4 x i32] where the lowest element is the input +/// Returns a vector of [4 x i32] where the lowest element is the input /// operand and the remaining elements are zero. /// /// \headerfile @@ -3473,11 +3479,11 @@ _mm_cvttps_epi32(__m128 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { - return (__m128i)(__v4si){ __a, 0, 0, 0 }; + return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 }; } #ifdef __x86_64__ -/// \brief Returns a vector of [2 x i64] where the lower element is the input +/// Returns a vector of [2 x i64] where the lower element is the input /// operand and the upper element is zero. /// /// \headerfile @@ -3490,11 +3496,11 @@ _mm_cvtsi32_si128(int __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { - return (__m128i){ __a, 0 }; + return __extension__ (__m128i)(__v2di){ __a, 0 }; } #endif -/// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a +/// Moves the least significant 32 bits of a vector of [4 x i32] to a /// 32-bit signed integer value. /// /// \headerfile @@ -3513,7 +3519,7 @@ _mm_cvtsi128_si32(__m128i __a) } #ifdef __x86_64__ -/// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a +/// Moves the least significant 64 bits of a vector of [2 x i64] to a /// 64-bit signed integer value. /// /// \headerfile @@ -3531,7 +3537,7 @@ _mm_cvtsi128_si64(__m128i __a) } #endif -/// \brief Moves packed integer values from an aligned 128-bit memory location +/// Moves packed integer values from an aligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile @@ -3547,7 +3553,7 @@ _mm_load_si128(__m128i const *__p) return *__p; } -/// \brief Moves packed integer values from an unaligned 128-bit memory location +/// Moves packed integer values from an unaligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile @@ -3566,7 +3572,7 @@ _mm_loadu_si128(__m128i const *__p) return ((struct __loadu_si128*)__p)->__v; } -/// \brief Returns a vector of [2 x i64] where the lower element is taken from +/// Returns a vector of [2 x i64] where the lower element is taken from /// the lower element of the operand, and the upper element is zero. /// /// \headerfile @@ -3584,10 +3590,10 @@ _mm_loadl_epi64(__m128i const *__p) struct __mm_loadl_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); - return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; + return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; } -/// \brief Generates a 128-bit vector of [4 x i32] with unspecified content. +/// Generates a 128-bit vector of [4 x i32] with unspecified content. /// This could be used as an argument to another intrinsic function where the /// argument is required but the value is not actually used. /// @@ -3602,7 +3608,7 @@ _mm_undefined_si128(void) return (__m128i)__builtin_ia32_undef128(); } -/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with +/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile @@ -3621,10 +3627,10 @@ _mm_undefined_si128(void) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0) { - return (__m128i){ __q0, __q1 }; + return __extension__ (__m128i)(__v2di){ __q0, __q1 }; } -/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with +/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile @@ -3643,10 +3649,10 @@ _mm_set_epi64x(long long __q1, long long __q0) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0) { - return (__m128i){ (long long)__q0, (long long)__q1 }; + return _mm_set_epi64x((long long)__q1, (long long)__q0); } -/// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with +/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with /// the specified 32-bit integer values. /// /// \headerfile @@ -3671,10 +3677,10 @@ _mm_set_epi64(__m64 __q1, __m64 __q0) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) { - return (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; + return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; } -/// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with +/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with /// the specified 16-bit integer values. /// /// \headerfile @@ -3711,10 +3717,10 @@ _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) { - return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; + return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; } -/// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with +/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with /// the specified 8-bit integer values. /// /// \headerfile @@ -3759,10 +3765,10 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { - return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; + return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; } -/// \brief Initializes both values in a 128-bit integer vector with the +/// Initializes both values in a 128-bit integer vector with the /// specified 64-bit integer value. /// /// \headerfile @@ -3778,10 +3784,10 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { - return (__m128i){ __q, __q }; + return _mm_set_epi64x(__q, __q); } -/// \brief Initializes both values in a 128-bit vector of [2 x i64] with the +/// Initializes both values in a 128-bit vector of [2 x i64] with the /// specified 64-bit value. /// /// \headerfile @@ -3797,10 +3803,10 @@ _mm_set1_epi64x(long long __q) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { - return (__m128i){ (long long)__q, (long long)__q }; + return _mm_set_epi64(__q, __q); } -/// \brief Initializes all values in a 128-bit vector of [4 x i32] with the +/// Initializes all values in a 128-bit vector of [4 x i32] with the /// specified 32-bit value. /// /// \headerfile @@ -3816,10 +3822,10 @@ _mm_set1_epi64(__m64 __q) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { - return (__m128i)(__v4si){ __i, __i, __i, __i }; + return _mm_set_epi32(__i, __i, __i, __i); } -/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the +/// Initializes all values in a 128-bit vector of [8 x i16] with the /// specified 16-bit value. /// /// \headerfile @@ -3835,10 +3841,10 @@ _mm_set1_epi32(int __i) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { - return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w }; + return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); } -/// \brief Initializes all values in a 128-bit vector of [16 x i8] with the +/// Initializes all values in a 128-bit vector of [16 x i8] with the /// specified 8-bit value. /// /// \headerfile @@ -3854,10 +3860,10 @@ _mm_set1_epi16(short __w) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { - return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b }; + return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } -/// \brief Constructs a 128-bit integer vector, initialized in reverse order +/// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile @@ -3874,10 +3880,10 @@ _mm_set1_epi8(char __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1) { - return (__m128i){ (long long)__q0, (long long)__q1 }; + return _mm_set_epi64(__q1, __q0); } -/// \brief Constructs a 128-bit integer vector, initialized in reverse order +/// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile @@ -3897,10 +3903,10 @@ _mm_setr_epi64(__m64 __q0, __m64 __q1) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) { - return (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; + return _mm_set_epi32(__i3, __i2, __i1, __i0); } -/// \brief Constructs a 128-bit integer vector, initialized in reverse order +/// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile @@ -3928,10 +3934,10 @@ _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) { - return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; + return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); } -/// \brief Constructs a 128-bit integer vector, initialized in reverse order +/// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile @@ -3975,10 +3981,10 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) { - return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; + return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } -/// \brief Creates a 128-bit integer vector initialized to zero. +/// Creates a 128-bit integer vector initialized to zero. /// /// \headerfile /// @@ -3989,10 +3995,10 @@ _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { - return (__m128i){ 0LL, 0LL }; + return __extension__ (__m128i)(__v2di){ 0LL, 0LL }; } -/// \brief Stores a 128-bit integer vector to a memory location aligned on a +/// Stores a 128-bit integer vector to a memory location aligned on a /// 128-bit boundary. /// /// \headerfile @@ -4010,7 +4016,7 @@ _mm_store_si128(__m128i *__p, __m128i __b) *__p = __b; } -/// \brief Stores a 128-bit integer vector to an unaligned memory location. +/// Stores a 128-bit integer vector to an unaligned memory location. /// /// \headerfile /// @@ -4029,7 +4035,70 @@ _mm_storeu_si128(__m128i *__p, __m128i __b) ((struct __storeu_si128*)__p)->__v = __b; } -/// \brief Moves bytes selected by the mask from the first operand to the +/// Stores a 64-bit integer value from the low element of a 128-bit integer +/// vector. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVQ / MOVQ instruction. +/// +/// \param __p +/// A pointer to a 64-bit memory location. The address of the memory +/// location does not have to be algned. +/// \param __b +/// A 128-bit integer vector containing the value to be stored. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_si64(void const *__p, __m128i __b) +{ + struct __storeu_si64 { + long long __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0]; +} + +/// Stores a 32-bit integer value from the low element of a 128-bit integer +/// vector. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVD / MOVD instruction. +/// +/// \param __p +/// A pointer to a 32-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \param __b +/// A 128-bit integer vector containing the value to be stored. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_si32(void const *__p, __m128i __b) +{ + struct __storeu_si32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0]; +} + +/// Stores a 16-bit integer value from the low element of a 128-bit integer +/// vector. +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a specific instruction. +/// +/// \param __p +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \param __b +/// A 128-bit integer vector containing the value to be stored. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_si16(void const *__p, __m128i __b) +{ + struct __storeu_si16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0]; +} + +/// Moves bytes selected by the mask from the first operand to the /// specified unaligned memory location. When a mask bit is 1, the /// corresponding byte is written, otherwise it is not written. /// @@ -4056,7 +4125,7 @@ _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); } -/// \brief Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to +/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to /// a memory location. /// /// \headerfile @@ -4078,7 +4147,7 @@ _mm_storel_epi64(__m128i *__p, __m128i __a) ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; } -/// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit +/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit /// aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be @@ -4098,7 +4167,7 @@ _mm_stream_pd(double *__p, __m128d __a) __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p); } -/// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location. +/// Stores a 128-bit integer vector to a 128-bit aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). @@ -4117,7 +4186,7 @@ _mm_stream_si128(__m128i *__p, __m128i __a) __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); } -/// \brief Stores a 32-bit integer value in the specified memory location. +/// Stores a 32-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). @@ -4130,14 +4199,14 @@ _mm_stream_si128(__m128i *__p, __m128i __a) /// A pointer to the 32-bit memory location used to store the value. /// \param __a /// A 32-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si32(int *__p, int __a) { __builtin_ia32_movnti(__p, __a); } #ifdef __x86_64__ -/// \brief Stores a 64-bit integer value in the specified memory location. +/// Stores a 64-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). @@ -4150,7 +4219,7 @@ _mm_stream_si32(int *__p, int __a) /// A pointer to the 64-bit memory location used to store the value. /// \param __a /// A 64-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si64(long long *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); @@ -4161,7 +4230,7 @@ _mm_stream_si64(long long *__p, long long __a) extern "C" { #endif -/// \brief The cache line containing \a __p is flushed and invalidated from all +/// The cache line containing \a __p is flushed and invalidated from all /// caches in the coherency domain. /// /// \headerfile @@ -4173,7 +4242,7 @@ extern "C" { /// flushed. void _mm_clflush(void const * __p); -/// \brief Forces strong memory ordering (serialization) between load +/// Forces strong memory ordering (serialization) between load /// instructions preceding this instruction and load instructions following /// this instruction, ensuring the system completes all previous loads before /// executing subsequent loads. @@ -4184,7 +4253,7 @@ void _mm_clflush(void const * __p); /// void _mm_lfence(void); -/// \brief Forces strong memory ordering (serialization) between load and store +/// Forces strong memory ordering (serialization) between load and store /// instructions preceding this instruction and load and store instructions /// following this instruction, ensuring that the system completes all /// previous memory accesses before executing subsequent memory accesses. @@ -4199,7 +4268,7 @@ void _mm_mfence(void); } // extern "C" #endif -/// \brief Converts 16-bit signed integers from both 128-bit integer vector +/// Converts 16-bit signed integers from both 128-bit integer vector /// operands into 8-bit signed integers, and packs the results into the /// destination. Positive values greater than 0x7F are saturated to 0x7F. /// Negative values less than 0x80 are saturated to 0x80. @@ -4227,7 +4296,7 @@ _mm_packs_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } -/// \brief Converts 32-bit signed integers from both 128-bit integer vector +/// Converts 32-bit signed integers from both 128-bit integer vector /// operands into 16-bit signed integers, and packs the results into the /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. /// Negative values less than 0x8000 are saturated to 0x8000. @@ -4255,7 +4324,7 @@ _mm_packs_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } -/// \brief Converts 16-bit signed integers from both 128-bit integer vector +/// Converts 16-bit signed integers from both 128-bit integer vector /// operands into 8-bit unsigned integers, and packs the results into the /// destination. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0x00 are saturated to 0x00. @@ -4283,7 +4352,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } -/// \brief Extracts 16 bits from a 128-bit integer vector of [8 x i16], using +/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using /// the immediate-value parameter as a selector. /// /// \headerfile @@ -4305,14 +4374,11 @@ _mm_packus_epi16(__m128i __a, __m128i __b) /// 111: assign values from bits [127:112] of \a __a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_extract_epi16(__m128i __a, int __imm) -{ - __v8hi __b = (__v8hi)__a; - return (unsigned short)__b[__imm & 7]; -} +#define _mm_extract_epi16(a, imm) \ + (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ + (int)(imm)) -/// \brief Constructs a 128-bit integer vector by first making a copy of the +/// Constructs a 128-bit integer vector by first making a copy of the /// 128-bit integer vector parameter, and then inserting the lower 16 bits /// of an integer parameter into an offset specified by the immediate-value /// parameter. @@ -4332,15 +4398,11 @@ _mm_extract_epi16(__m128i __a, int __imm) /// An immediate value specifying the bit offset in the result at which the /// lower 16 bits of \a __b are written. /// \returns A 128-bit integer vector containing the constructed values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_insert_epi16(__m128i __a, int __b, int __imm) -{ - __v8hi __c = (__v8hi)__a; - __c[__imm & 7] = __b; - return (__m128i)__c; -} +#define _mm_insert_epi16(a, b, imm) \ + (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ + (int)(imm)) -/// \brief Copies the values of the most significant bits from each 8-bit +/// Copies the values of the most significant bits from each 8-bit /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask /// value, zero-extends the value, and writes it to the destination. /// @@ -4358,7 +4420,7 @@ _mm_movemask_epi8(__m128i __a) return __builtin_ia32_pmovmskb128((__v16qi)__a); } -/// \brief Constructs a 128-bit integer vector by shuffling four 32-bit +/// Constructs a 128-bit integer vector by shuffling four 32-bit /// elements of a 128-bit integer vector parameter, using the immediate-value /// parameter as a specifier. /// @@ -4386,13 +4448,10 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [95:64] of \a a. \n /// 11: assign values from bits [127:96] of \a a. /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ - (__v4si)_mm_undefined_si128(), \ - ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); }) +#define _mm_shuffle_epi32(a, imm) \ + (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)) -/// \brief Constructs a 128-bit integer vector by shuffling four lower 16-bit +/// Constructs a 128-bit integer vector by shuffling four lower 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// @@ -4419,14 +4478,10 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_undefined_si128(), \ - ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \ - 4, 5, 6, 7); }) - -/// \brief Constructs a 128-bit integer vector by shuffling four upper 16-bit +#define _mm_shufflelo_epi16(a, imm) \ + (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)) + +/// Constructs a 128-bit integer vector by shuffling four upper 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// @@ -4453,16 +4508,10 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_undefined_si128(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)); }) - -/// \brief Unpacks the high-order (index 8-15) values from two 128-bit vectors +#define _mm_shufflehi_epi16(a, imm) \ + (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)) + +/// Unpacks the high-order (index 8-15) values from two 128-bit vectors /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile @@ -4497,7 +4546,7 @@ _mm_unpackhi_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -/// \brief Unpacks the high-order (index 4-7) values from two 128-bit vectors of +/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. /// /// \headerfile @@ -4524,7 +4573,7 @@ _mm_unpackhi_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); } -/// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of +/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile @@ -4547,7 +4596,7 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); } -/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of +/// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile @@ -4568,7 +4617,7 @@ _mm_unpackhi_epi64(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1); } -/// \brief Unpacks the low-order (index 0-7) values from two 128-bit vectors of +/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile @@ -4603,7 +4652,7 @@ _mm_unpacklo_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); } -/// \brief Unpacks the low-order (index 0-3) values from each of the two 128-bit +/// Unpacks the low-order (index 0-3) values from each of the two 128-bit /// vectors of [8 x i16] and interleaves them into a 128-bit vector of /// [8 x i16]. /// @@ -4631,7 +4680,7 @@ _mm_unpacklo_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); } -/// \brief Unpacks the low-order (index 0,1) values from two 128-bit vectors of +/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile @@ -4654,7 +4703,7 @@ _mm_unpacklo_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); } -/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors of +/// Unpacks the low-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile @@ -4675,7 +4724,7 @@ _mm_unpacklo_epi64(__m128i __a, __m128i __b) return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0); } -/// \brief Returns the lower 64 bits of a 128-bit integer vector as a 64-bit +/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit /// integer. /// /// \headerfile @@ -4692,7 +4741,7 @@ _mm_movepi64_pi64(__m128i __a) return (__m64)__a[0]; } -/// \brief Moves the 64-bit operand to a 128-bit integer vector, zeroing the +/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the /// upper bits. /// /// \headerfile @@ -4706,10 +4755,10 @@ _mm_movepi64_pi64(__m128i __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { - return (__m128i){ (long long)__a, 0 }; + return __extension__ (__m128i)(__v2di){ (long long)__a, 0 }; } -/// \brief Moves the lower 64 bits of a 128-bit integer vector to a 128-bit +/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit /// integer vector, zeroing the upper bits. /// /// \headerfile @@ -4724,10 +4773,10 @@ _mm_movpi64_epi64(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { - return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2); + return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); } -/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of +/// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// @@ -4748,7 +4797,7 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b) return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1); } -/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors +/// Unpacks the low-order 64-bit elements from two 128-bit vectors /// of [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// @@ -4769,7 +4818,7 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0); } -/// \brief Extracts the sign bits of the double-precision values in the 128-bit +/// Extracts the sign bits of the double-precision values in the 128-bit /// vector of [2 x double], zero-extends the value, and writes it to the /// low-order bits of the destination. /// @@ -4789,7 +4838,7 @@ _mm_movemask_pd(__m128d __a) } -/// \brief Constructs a 128-bit floating-point vector of [2 x double] from two +/// Constructs a 128-bit floating-point vector of [2 x double] from two /// 128-bit vector parameters of [2 x double], using the immediate-value /// parameter as a specifier. /// @@ -4813,12 +4862,11 @@ _mm_movemask_pd(__m128d __a) /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// \returns A 128-bit vector of [2 x double] containing the shuffled values. -#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ - 0 + (((i) >> 0) & 0x1), \ - 2 + (((i) >> 1) & 0x1)); }) +#define _mm_shuffle_pd(a, b, i) \ + (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ + (int)(i)) -/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit +/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. /// /// \headerfile @@ -4835,7 +4883,7 @@ _mm_castpd_ps(__m128d __a) return (__m128)__a; } -/// \brief Casts a 128-bit floating-point vector of [2 x double] into a 128-bit +/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// integer vector. /// /// \headerfile @@ -4852,7 +4900,7 @@ _mm_castpd_si128(__m128d __a) return (__m128i)__a; } -/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit +/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// floating-point vector of [2 x double]. /// /// \headerfile @@ -4869,7 +4917,7 @@ _mm_castps_pd(__m128 __a) return (__m128d)__a; } -/// \brief Casts a 128-bit floating-point vector of [4 x float] into a 128-bit +/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// integer vector. /// /// \headerfile @@ -4886,7 +4934,7 @@ _mm_castps_si128(__m128 __a) return (__m128i)__a; } -/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector +/// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [4 x float]. /// /// \headerfile @@ -4903,7 +4951,7 @@ _mm_castsi128_ps(__m128i __a) return (__m128)__a; } -/// \brief Casts a 128-bit integer vector into a 128-bit floating-point vector +/// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [2 x double]. /// /// \headerfile @@ -4924,7 +4972,7 @@ _mm_castsi128_pd(__m128i __a) extern "C" { #endif -/// \brief Indicates that a spin loop is being executed for the purposes of +/// Indicates that a spin loop is being executed for the purposes of /// optimizing power consumption during the loop. /// /// \headerfile @@ -4937,6 +4985,7 @@ void _mm_pause(void); } // extern "C" #endif #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_MMX #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) diff --git a/lib/clang/7.0.0/include/f16cintrin.h b/lib/clang/8.0.0/include/f16cintrin.h similarity index 60% rename from lib/clang/7.0.0/include/f16cintrin.h rename to lib/clang/8.0.0/include/f16cintrin.h index b796cc8..3d35f28 100644 --- a/lib/clang/7.0.0/include/f16cintrin.h +++ b/lib/clang/8.0.0/include/f16cintrin.h @@ -21,18 +21,25 @@ *===-----------------------------------------------------------------------=== */ -#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H -#error "Never use directly; include instead." +#if !defined __IMMINTRIN_H +#error "Never use directly; include instead." #endif #ifndef __F16CINTRIN_H #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) -/// \brief Converts a 16-bit half-precision float value into a 32-bit float +/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, + * but that's because icc can emulate these without f16c using a library call. + * Since we don't do that let's leave these in f16cintrin.h. + */ + +/// Converts a 16-bit half-precision float value into a 32-bit float /// value. /// /// \headerfile @@ -42,7 +49,7 @@ /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS +static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; @@ -50,7 +57,7 @@ _cvtsh_ss(unsigned short __a) return r[0]; } -/// \brief Converts a 32-bit single-precision float value to a 16-bit +/// Converts a 32-bit single-precision float value to a 16-bit /// half-precision float value. /// /// \headerfile @@ -72,11 +79,11 @@ _cvtsh_ss(unsigned short __a) /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. -#define _cvtss_sh(a, imm) __extension__ ({ \ +#define _cvtss_sh(a, imm) \ (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ - (imm)))[0]); }) + (imm)))[0]) -/// \brief Converts a 128-bit vector containing 32-bit float values into a +/// Converts a 128-bit vector containing 32-bit float values into a /// 128-bit vector containing 16-bit half-precision float values. /// /// \headerfile @@ -99,10 +106,10 @@ _cvtsh_ss(unsigned short __a) /// \returns A 128-bit vector containing converted 16-bit half-precision float /// values. The lower 64 bits are used to store the converted 16-bit /// half-precision floating-point values. -#define _mm_cvtps_ph(a, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); }) +#define _mm_cvtps_ph(a, imm) \ + (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)) -/// \brief Converts a 128-bit vector containing 16-bit half-precision float +/// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 128-bit vector containing 32-bit float values. /// /// \headerfile @@ -113,12 +120,57 @@ _cvtsh_ss(unsigned short __a) /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); } -#undef __DEFAULT_FN_ATTRS +/// Converts a 256-bit vector of [8 x float] into a 128-bit vector +/// containing 16-bit half-precision float values. +/// +/// \headerfile +/// +/// \code +/// __m128i _mm256_cvtps_ph(__m256 a, const int imm); +/// \endcode +/// +/// This intrinsic corresponds to the VCVTPS2PH instruction. +/// +/// \param a +/// A 256-bit vector containing 32-bit single-precision float values to be +/// converted to 16-bit half-precision float values. +/// \param imm +/// An immediate value controlling rounding using bits [2:0]: \n +/// 000: Nearest \n +/// 001: Down \n +/// 010: Up \n +/// 011: Truncate \n +/// 1XX: Use MXCSR.RC for rounding +/// \returns A 128-bit vector containing the converted 16-bit half-precision +/// float values. +#define _mm256_cvtps_ph(a, imm) \ + (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)) + +/// Converts a 128-bit vector containing 16-bit half-precision float +/// values into a 256-bit vector of [8 x float]. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTPH2PS instruction. +/// +/// \param __a +/// A 128-bit vector containing 16-bit half-precision float values to be +/// converted to 32-bit single-precision float values. +/// \returns A vector of [8 x float] containing the converted 32-bit +/// single-precision float values. +static __inline __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtph_ps(__m128i __a) +{ + return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __F16CINTRIN_H */ diff --git a/lib/clang/7.0.0/include/float.h b/lib/clang/8.0.0/include/float.h similarity index 94% rename from lib/clang/7.0.0/include/float.h rename to lib/clang/8.0.0/include/float.h index 44d4d05..56215cd 100644 --- a/lib/clang/7.0.0/include/float.h +++ b/lib/clang/8.0.0/include/float.h @@ -21,8 +21,8 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __FLOAT_H -#define __FLOAT_H +#ifndef __CLANG_FLOAT_H +#define __CLANG_FLOAT_H /* If we're on MinGW, fall back to the system's float.h, which might have * additional definitions provided for Windows. @@ -85,6 +85,9 @@ # undef FLT_DECIMAL_DIG # undef DBL_DECIMAL_DIG # undef LDBL_DECIMAL_DIG +# undef FLT_HAS_SUBNORM +# undef DBL_HAS_SUBNORM +# undef LDBL_HAS_SUBNORM # endif #endif @@ -141,6 +144,9 @@ # define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ # define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ # define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ +# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ +# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ +# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ #endif #ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ @@ -157,4 +163,4 @@ # define FLT16_TRUE_MIN __FLT16_TRUE_MIN__ #endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */ -#endif /* __FLOAT_H */ +#endif /* __CLANG_FLOAT_H */ diff --git a/lib/clang/7.0.0/include/fma4intrin.h b/lib/clang/8.0.0/include/fma4intrin.h similarity index 76% rename from lib/clang/7.0.0/include/fma4intrin.h rename to lib/clang/8.0.0/include/fma4intrin.h index 962b1a6..7bae2f4 100644 --- a/lib/clang/7.0.0/include/fma4intrin.h +++ b/lib/clang/8.0.0/include/fma4intrin.h @@ -31,200 +31,202 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMA4INTRIN_H */ diff --git a/lib/clang/7.0.0/include/fmaintrin.h b/lib/clang/8.0.0/include/fmaintrin.h similarity index 75% rename from lib/clang/7.0.0/include/fmaintrin.h rename to lib/clang/8.0.0/include/fmaintrin.h index 478a0ac..094d13a 100644 --- a/lib/clang/7.0.0/include/fmaintrin.h +++ b/lib/clang/8.0.0/include/fmaintrin.h @@ -1,4 +1,4 @@ -/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== +/*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -29,200 +29,202 @@ #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMAINTRIN_H */ diff --git a/lib/clang/7.0.0/include/fxsrintrin.h b/lib/clang/8.0.0/include/fxsrintrin.h similarity index 87% rename from lib/clang/7.0.0/include/fxsrintrin.h rename to lib/clang/8.0.0/include/fxsrintrin.h index 786081c..704b5ad 100644 --- a/lib/clang/7.0.0/include/fxsrintrin.h +++ b/lib/clang/8.0.0/include/fxsrintrin.h @@ -30,7 +30,7 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr"))) -/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte +/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile @@ -43,10 +43,10 @@ static __inline__ void __DEFAULT_FN_ATTRS _fxsave(void *__p) { - return __builtin_ia32_fxsave(__p); + __builtin_ia32_fxsave(__p); } -/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte +/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. @@ -61,11 +61,11 @@ _fxsave(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxrstor(void *__p) { - return __builtin_ia32_fxrstor(__p); + __builtin_ia32_fxrstor(__p); } #ifdef __x86_64__ -/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte +/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile @@ -78,10 +78,10 @@ _fxrstor(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxsave64(void *__p) { - return __builtin_ia32_fxsave64(__p); + __builtin_ia32_fxsave64(__p); } -/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte +/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. @@ -96,7 +96,7 @@ _fxsave64(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxrstor64(void *__p) { - return __builtin_ia32_fxrstor64(__p); + __builtin_ia32_fxrstor64(__p); } #endif diff --git a/lib/clang/7.0.0/include/gfniintrin.h b/lib/clang/8.0.0/include/gfniintrin.h similarity index 65% rename from lib/clang/7.0.0/include/gfniintrin.h rename to lib/clang/8.0.0/include/gfniintrin.h index 20fadcc..804d4f3 100644 --- a/lib/clang/7.0.0/include/gfniintrin.h +++ b/lib/clang/8.0.0/include/gfniintrin.h @@ -29,104 +29,108 @@ #define __GFNIINTRIN_H -#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)); }) + (__v16qi)(__m128i)(S)) -#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)); }) + (__v32qi)(__m256i)(S)) -#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)); }) + (__v64qi)(__m512i)(S)) -#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ - (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \ - U, A, B, I); }) +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ + (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ + U, A, B, I) -#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm_gf2p8affine_epi64_epi8(A, B, I) \ (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)); }) + (__v16qi)(__m128i)(S)) -#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)); }) + (__v32qi)(__m256i)(S)) -#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)); }) + (__v64qi)(__m512i)(S)) -#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ - (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \ - U, A, B, I); }) +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ + (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ + U, A, B, I) /* Default attributes for simple form (no masking). */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) + +/* Default attributes for YMM unmasked form. */ +#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) +#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) /* Default attributes for VLX forms. */ -#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) +#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) @@ -135,7 +139,7 @@ _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) (__v16qi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128(__U, @@ -143,21 +147,21 @@ _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi) __S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS_Y _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, (__v32qi) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256(__U, @@ -165,21 +169,21 @@ _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi) __S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_F +static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_F +static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_selectb_512(__U, @@ -187,16 +191,18 @@ _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) (__v64qi) __S); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_F +static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) { - return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), + return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), __U, __A, __B); } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_F -#undef __DEFAULT_FN_ATTRS_VL +#undef __DEFAULT_FN_ATTRS_Y +#undef __DEFAULT_FN_ATTRS_Z +#undef __DEFAULT_FN_ATTRS_VL128 +#undef __DEFAULT_FN_ATTRS_VL256 -#endif // __GFNIINTRIN_H +#endif /* __GFNIINTRIN_H */ diff --git a/lib/clang/7.0.0/include/htmintrin.h b/lib/clang/8.0.0/include/htmintrin.h similarity index 100% rename from lib/clang/7.0.0/include/htmintrin.h rename to lib/clang/8.0.0/include/htmintrin.h diff --git a/lib/clang/7.0.0/include/htmxlintrin.h b/lib/clang/8.0.0/include/htmxlintrin.h similarity index 100% rename from lib/clang/7.0.0/include/htmxlintrin.h rename to lib/clang/8.0.0/include/htmxlintrin.h diff --git a/lib/clang/7.0.0/include/ia32intrin.h b/lib/clang/8.0.0/include/ia32intrin.h similarity index 95% rename from lib/clang/7.0.0/include/ia32intrin.h rename to lib/clang/8.0.0/include/ia32intrin.h index 4928300..f8972e3 100644 --- a/lib/clang/7.0.0/include/ia32intrin.h +++ b/lib/clang/8.0.0/include/ia32intrin.h @@ -70,4 +70,9 @@ __rdtscp(unsigned int *__A) { #define _rdpmc(A) __rdpmc(A) +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_wbinvd(void) { + __builtin_ia32_wbinvd(); +} + #endif /* __IA32INTRIN_H */ diff --git a/lib/clang/7.0.0/include/immintrin.h b/lib/clang/8.0.0/include/immintrin.h similarity index 58% rename from lib/clang/7.0.0/include/immintrin.h rename to lib/clang/8.0.0/include/immintrin.h index a332879..7d0722e 100644 --- a/lib/clang/7.0.0/include/immintrin.h +++ b/lib/clang/8.0.0/include/immintrin.h @@ -68,55 +68,11 @@ #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) #include +#endif -/* The 256-bit versions of functions in f16cintrin.h. - Intel documents these as being in immintrin.h, and - they depend on typedefs from avxintrin.h. */ - -/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector -/// containing 16-bit half-precision float values. -/// -/// \headerfile -/// -/// \code -/// __m128i _mm256_cvtps_ph(__m256 a, const int imm); -/// \endcode -/// -/// This intrinsic corresponds to the VCVTPS2PH instruction. -/// -/// \param a -/// A 256-bit vector containing 32-bit single-precision float values to be -/// converted to 16-bit half-precision float values. -/// \param imm -/// An immediate value controlling rounding using bits [2:0]: \n -/// 000: Nearest \n -/// 001: Down \n -/// 010: Up \n -/// 011: Truncate \n -/// 1XX: Use MXCSR.RC for rounding -/// \returns A 128-bit vector containing the converted 16-bit half-precision -/// float values. -#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) - -/// \brief Converts a 128-bit vector containing 16-bit half-precision float -/// values into a 256-bit vector of [8 x float]. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the VCVTPH2PS instruction. -/// -/// \param __a -/// A 128-bit vector containing 16-bit half-precision float values to be -/// converted to 32-bit single-precision float values. -/// \returns A vector of [8 x float] containing the converted 32-bit -/// single-precision float values. -static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) -_mm256_cvtph_ps(__m128i __a) -{ - return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); -} -#endif /* __AVX2__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) +#include +#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) #include @@ -134,6 +90,10 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) #include #endif @@ -248,7 +208,7 @@ _mm256_cvtph_ps(__m128i __a) #endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) -/// \brief Returns the value of the IA32_TSC_AUX MSR (0xc0000103). +/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile /// @@ -322,30 +282,89 @@ _readgsbase_u64(void) static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { - return __builtin_ia32_wrfsbase32(__V); + __builtin_ia32_wrfsbase32(__V); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { - return __builtin_ia32_wrfsbase64(__V); + __builtin_ia32_wrfsbase64(__V); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { - return __builtin_ia32_wrgsbase32(__V); + __builtin_ia32_wrgsbase32(__V); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u64(unsigned long long __V) { - return __builtin_ia32_wrgsbase64(__V); + __builtin_ia32_wrgsbase64(__V); } #endif #endif /* __FSGSBASE__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) + +/* The structs used below are to force the load/store to be unaligned. This + * is accomplished with the __packed__ attribute. The __may_alias__ prevents + * tbaa metadata from being generated based on the struct and the type of the + * field inside of it. + */ + +static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i16(void const * __P) { + struct __loadu_i16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i16(void * __P, short __D) { + struct __storeu_i16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i32(void const * __P) { + struct __loadu_i32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i32(void * __P, int __D) { + struct __storeu_i32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); +} + +#ifdef __x86_64__ +static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i64(void const * __P) { + struct __loadu_i64 { + long long __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i64(void * __P, long long __D) { + struct __storeu_i64 { + long long __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); +} +#endif +#endif /* __MOVBE */ + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) #include #include @@ -383,4 +402,125 @@ _writegsbase_u64(unsigned long long __V) * whereas others are also available at all times. */ #include +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + defined(__MOVDIRI__) || defined(__MOVDIR64B__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) +#include +#endif + +#ifdef _MSC_VER +/* Define the default attributes for these intrinsics */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#ifdef __cplusplus +extern "C" { +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange HLE +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +#endif +#if defined(__x86_64__) +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Compare Exchange HLE +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, + long _Exchange, long _Comparand) { + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedCompareExchange_HLERelease(long volatile *_Destination, + long _Exchange, long _Comparand) { + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +#endif +#if defined(__x86_64__) +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +#endif +#ifdef __cplusplus +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* _MSC_VER */ + #endif /* __IMMINTRIN_H */ diff --git a/lib/clang/7.0.0/include/intrin.h b/lib/clang/8.0.0/include/intrin.h similarity index 90% rename from lib/clang/7.0.0/include/intrin.h rename to lib/clang/8.0.0/include/intrin.h index b30aa21..6f753ba 100644 --- a/lib/clang/7.0.0/include/intrin.h +++ b/lib/clang/8.0.0/include/intrin.h @@ -38,7 +38,7 @@ #include #endif -#if defined(_M_ARM64) +#if defined(__aarch64__) #include #endif @@ -83,6 +83,7 @@ void __incfsdword(unsigned long); void __incfsword(unsigned long); unsigned long __indword(unsigned short); void __indwordstring(unsigned short, unsigned long *, unsigned long); +void __int2c(void); void __invlpg(void *); unsigned short __inword(unsigned short); void __inwordstring(unsigned short, unsigned short *, unsigned long); @@ -140,6 +141,7 @@ void __svm_stgi(void); void __svm_vmload(size_t); void __svm_vmrun(size_t); void __svm_vmsave(size_t); +void __ud2(void); unsigned __int64 __ull_rshift(unsigned __int64, int); void __vmx_off(void); void __vmx_vmptrst(unsigned __int64 *); @@ -161,25 +163,15 @@ static __inline__ unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); static __inline__ unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); -static __inline__ unsigned char _bittest(long const *, long); -static __inline__ unsigned char _bittestandcomplement(long *, long); -static __inline__ unsigned char _bittestandreset(long *, long); -static __inline__ unsigned char _bittestandset(long *, long); void __cdecl _disable(void); void __cdecl _enable(void); long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); unsigned char _interlockedbittestandreset(long volatile *, long); unsigned char _interlockedbittestandset(long volatile *, long); -long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); -long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); -__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, - __int64); -__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, - __int64); void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *); void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, @@ -256,24 +248,15 @@ void __writegsbyte(unsigned long, unsigned char); void __writegsdword(unsigned long, unsigned long); void __writegsqword(unsigned long, unsigned __int64); void __writegsword(unsigned long, unsigned short); -static __inline__ -unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); -static __inline__ -unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); -static __inline__ unsigned char _bittest64(__int64 const *, __int64); -static __inline__ unsigned char _bittestandcomplement64(__int64 *, __int64); -static __inline__ unsigned char _bittestandreset64(__int64 *, __int64); -static __inline__ unsigned char _bittestandset64(__int64 *, __int64); long _InterlockedAnd_np(long volatile *_Value, long _Mask); short _InterlockedAnd16_np(short volatile *_Value, short _Mask); __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedAnd8_np(char volatile *_Value, char _Mask); unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); -static __inline__ unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, long _Comparand); @@ -287,10 +270,6 @@ unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, __int64 *_ComparandResult); short _InterlockedCompareExchange16_np(short volatile *_Destination, short _Exchange, short _Comparand); -__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, - __int64); -__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, - __int64); __int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, @@ -320,7 +299,12 @@ unsigned __int64 _umul128(unsigned __int64, #endif /* __x86_64__ */ -#if defined(__x86_64__) || defined(__arm__) +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + +static __inline__ +unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); static __inline__ __int64 _InterlockedDecrement64(__int64 volatile *_Addend); @@ -341,78 +325,6 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask); #endif -/*----------------------------------------------------------------------------*\ -|* Bit Counting and Testing -\*----------------------------------------------------------------------------*/ -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittest(long const *_BitBase, long _BitPos) { - return (*_BitBase >> _BitPos) & 1; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandcomplement(long *_BitBase, long _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase ^ (1 << _BitPos); - return _Res; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandreset(long *_BitBase, long _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase & ~(1 << _BitPos); - return _Res; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandset(long *_BitBase, long _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase | (1 << _BitPos); - return _Res; -} -#if defined(__arm__) || defined(__aarch64__) -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos) { - long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_ACQUIRE); - return (_PrevVal >> _BitPos) & 1; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos) { - long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELAXED); - return (_PrevVal >> _BitPos) & 1; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos) { - long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELEASE); - return (_PrevVal >> _BitPos) & 1; -} -#endif -#ifdef __x86_64__ -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittest64(__int64 const *_BitBase, __int64 _BitPos) { - return (*_BitBase >> _BitPos) & 1; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase ^ (1ll << _BitPos); - return _Res; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase & ~(1ll << _BitPos); - return _Res; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandset64(__int64 *_BitBase, __int64 _BitPos) { - unsigned char _Res = (*_BitBase >> _BitPos) & 1; - *_BitBase = *_BitBase | (1ll << _BitPos); - return _Res; -} -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) { - long long _PrevVal = - __atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST); - return (_PrevVal >> _BitPos) & 1; -} -#endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Add \*----------------------------------------------------------------------------*/ @@ -602,6 +514,23 @@ _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) { } #endif /*----------------------------------------------------------------------------*\ +|* Bit Counting and Testing +\*----------------------------------------------------------------------------*/ +#if defined(__arm__) || defined(__aarch64__) +unsigned char _interlockedbittestandset_acq(long volatile *_BitBase, + long _BitPos); +unsigned char _interlockedbittestandset_nf(long volatile *_BitBase, + long _BitPos); +unsigned char _interlockedbittestandset_rel(long volatile *_BitBase, + long _BitPos); +unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase, + long _BitPos); +unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase, + long _BitPos); +unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase, + long _BitPos); +#endif +/*----------------------------------------------------------------------------*\ |* Interlocked Or \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) @@ -868,33 +797,40 @@ _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, #if defined(__i386__) || defined(__x86_64__) static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { - __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n)); + __asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n) + : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { - __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n)); + __asm__ __volatile__("rep movsl" : "+D"(__dst), "+S"(__src), "+c"(__n) + : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { - __asm__("rep movsw" : : "D"(__dst), "S"(__src), "c"(__n)); + __asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n) + : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { - __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)); + __asm__ __volatile__("rep stosl" : "+D"(__dst), "+c"(__n) : "a"(__x) + : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, unsigned short __x, size_t __n) { - __asm__("rep stosw" : : "D"(__dst), "a"(__x), "c"(__n)); + __asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x) + : "memory"); } #endif #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS __movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) { - __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n)); + __asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n) + : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { - __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n)); + __asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x) + : "memory"); } #endif @@ -928,6 +864,14 @@ __nop(void) { } #endif +/*----------------------------------------------------------------------------*\ +|* MS AArch64 specific +\*----------------------------------------------------------------------------*/ +#if defined(__aarch64__) +unsigned __int64 __getReg(int); +long _InterlockedAdd(long volatile *Addend, long Value); +#endif + /*----------------------------------------------------------------------------*\ |* Privileged intrinsics \*----------------------------------------------------------------------------*/ diff --git a/lib/clang/7.0.0/include/inttypes.h b/lib/clang/8.0.0/include/inttypes.h similarity index 100% rename from lib/clang/7.0.0/include/inttypes.h rename to lib/clang/8.0.0/include/inttypes.h diff --git a/lib/clang/8.0.0/include/invpcidintrin.h b/lib/clang/8.0.0/include/invpcidintrin.h new file mode 100644 index 0000000..c30a19f --- /dev/null +++ b/lib/clang/8.0.0/include/invpcidintrin.h @@ -0,0 +1,37 @@ +/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __INVPCIDINTRIN_H +#define __INVPCIDINTRIN_H + +static __inline__ void + __attribute__((__always_inline__, __nodebug__, __target__("invpcid"))) +_invpcid(unsigned int __type, void *__descriptor) { + __builtin_ia32_invpcid(__type, __descriptor); +} + +#endif /* __INVPCIDINTRIN_H */ diff --git a/lib/clang/7.0.0/include/iso646.h b/lib/clang/8.0.0/include/iso646.h similarity index 100% rename from lib/clang/7.0.0/include/iso646.h rename to lib/clang/8.0.0/include/iso646.h diff --git a/lib/clang/7.0.0/include/limits.h b/lib/clang/8.0.0/include/limits.h similarity index 100% rename from lib/clang/7.0.0/include/limits.h rename to lib/clang/8.0.0/include/limits.h diff --git a/lib/clang/7.0.0/include/lwpintrin.h b/lib/clang/8.0.0/include/lwpintrin.h similarity index 92% rename from lib/clang/7.0.0/include/lwpintrin.h rename to lib/clang/8.0.0/include/lwpintrin.h index c95fdd9..0b28d73 100644 --- a/lib/clang/7.0.0/include/lwpintrin.h +++ b/lib/clang/8.0.0/include/lwpintrin.h @@ -31,7 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp"))) -/// \brief Parses the LWPCB at the specified address and enables +/// Parses the LWPCB at the specified address and enables /// profiling if valid. /// /// \headerfile @@ -48,7 +48,7 @@ __llwpcb (void *__addr) __builtin_ia32_llwpcb(__addr); } -/// \brief Flushes the LWP state to memory and returns the address of the LWPCB. +/// Flushes the LWP state to memory and returns the address of the LWPCB. /// /// \headerfile /// @@ -58,12 +58,12 @@ __llwpcb (void *__addr) /// Address to the current Lightweight Profiling Control Block (LWPCB). /// If LWP is not currently enabled, returns NULL. static __inline__ void* __DEFAULT_FN_ATTRS -__slwpcb () +__slwpcb (void) { return __builtin_ia32_slwpcb(); } -/// \brief Inserts programmed event record into the LWP event ring buffer +/// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile @@ -84,7 +84,7 @@ __slwpcb () (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) -/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// @@ -104,7 +104,7 @@ __slwpcb () #ifdef __x86_64__ -/// \brief Inserts programmed event record into the LWP event ring buffer +/// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile @@ -125,7 +125,7 @@ __slwpcb () (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) -/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// diff --git a/lib/clang/7.0.0/include/lzcntintrin.h b/lib/clang/8.0.0/include/lzcntintrin.h similarity index 86% rename from lib/clang/7.0.0/include/lzcntintrin.h rename to lib/clang/8.0.0/include/lzcntintrin.h index 3d2769d..f2674d2 100644 --- a/lib/clang/7.0.0/include/lzcntintrin.h +++ b/lib/clang/8.0.0/include/lzcntintrin.h @@ -31,7 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) -/// \brief Counts the number of leading zero bits in the operand. +/// Counts the number of leading zero bits in the operand. /// /// \headerfile /// @@ -44,10 +44,10 @@ static __inline__ unsigned short __DEFAULT_FN_ATTRS __lzcnt16(unsigned short __X) { - return __X ? __builtin_clzs(__X) : 16; + return __builtin_ia32_lzcnt_u16(__X); } -/// \brief Counts the number of leading zero bits in the operand. +/// Counts the number of leading zero bits in the operand. /// /// \headerfile /// @@ -57,13 +57,14 @@ __lzcnt16(unsigned short __X) /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. +/// \see _lzcnt_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __lzcnt32(unsigned int __X) { - return __X ? __builtin_clz(__X) : 32; + return __builtin_ia32_lzcnt_u32(__X); } -/// \brief Counts the number of leading zero bits in the operand. +/// Counts the number of leading zero bits in the operand. /// /// \headerfile /// @@ -73,14 +74,15 @@ __lzcnt32(unsigned int __X) /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. +/// \see __lzcnt32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _lzcnt_u32(unsigned int __X) { - return __X ? __builtin_clz(__X) : 32; + return __builtin_ia32_lzcnt_u32(__X); } #ifdef __x86_64__ -/// \brief Counts the number of leading zero bits in the operand. +/// Counts the number of leading zero bits in the operand. /// /// \headerfile /// @@ -90,13 +92,14 @@ _lzcnt_u32(unsigned int __X) /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. +/// \see _lzcnt_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __lzcnt64(unsigned long long __X) { - return __X ? __builtin_clzll(__X) : 64; + return __builtin_ia32_lzcnt_u64(__X); } -/// \brief Counts the number of leading zero bits in the operand. +/// Counts the number of leading zero bits in the operand. /// /// \headerfile /// @@ -106,10 +109,11 @@ __lzcnt64(unsigned long long __X) /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. +/// \see __lzcnt64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _lzcnt_u64(unsigned long long __X) { - return __X ? __builtin_clzll(__X) : 64; + return __builtin_ia32_lzcnt_u64(__X); } #endif diff --git a/lib/clang/7.0.0/include/mm3dnow.h b/lib/clang/8.0.0/include/mm3dnow.h similarity index 96% rename from lib/clang/7.0.0/include/mm3dnow.h rename to lib/clang/8.0.0/include/mm3dnow.h index 294866c..b028875 100644 --- a/lib/clang/7.0.0/include/mm3dnow.h +++ b/lib/clang/8.0.0/include/mm3dnow.h @@ -30,9 +30,9 @@ typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) _m_femms(void) { __builtin_ia32_femms(); } @@ -134,7 +134,7 @@ _m_pmulhrw(__m64 __m1, __m64 __m2) { /* Handle the 3dnowa instructions here. */ #undef __DEFAULT_FN_ATTRS -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { diff --git a/lib/clang/7.0.0/include/mm_malloc.h b/lib/clang/8.0.0/include/mm_malloc.h similarity index 100% rename from lib/clang/7.0.0/include/mm_malloc.h rename to lib/clang/8.0.0/include/mm_malloc.h diff --git a/lib/clang/7.0.0/include/mmintrin.h b/lib/clang/8.0.0/include/mmintrin.h similarity index 90% rename from lib/clang/7.0.0/include/mmintrin.h rename to lib/clang/8.0.0/include/mmintrin.h index 102c08f..a735399 100644 --- a/lib/clang/7.0.0/include/mmintrin.h +++ b/lib/clang/8.0.0/include/mmintrin.h @@ -32,22 +32,22 @@ typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) -/// \brief Clears the MMX state by setting the state of the x87 stack registers +/// Clears the MMX state by setting the state of the x87 stack registers /// to empty. /// /// \headerfile /// /// This intrinsic corresponds to the EMMS instruction. /// -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) _mm_empty(void) { __builtin_ia32_emms(); } -/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the +/// Constructs a 64-bit integer vector, setting the lower 32 bits to the /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. /// /// \headerfile @@ -64,7 +64,7 @@ _mm_cvtsi32_si64(int __i) return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); } -/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit +/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit /// signed integer. /// /// \headerfile @@ -81,7 +81,7 @@ _mm_cvtsi64_si32(__m64 __m) return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); } -/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector. +/// Casts a 64-bit signed integer value into a 64-bit integer vector. /// /// \headerfile /// @@ -97,7 +97,7 @@ _mm_cvtsi64_m64(long long __i) return (__m64)__i; } -/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value. +/// Casts a 64-bit integer vector into a 64-bit signed integer value. /// /// \headerfile /// @@ -113,7 +113,7 @@ _mm_cvtm64_si64(__m64 __m) return (long long)__m; } -/// \brief Converts 16-bit signed integers from both 64-bit integer vector +/// Converts 16-bit signed integers from both 64-bit integer vector /// parameters of [4 x i16] into 8-bit signed integer values, and constructs /// a 64-bit integer vector of [8 x i8] as the result. Positive values /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 @@ -143,7 +143,7 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Converts 32-bit signed integers from both 64-bit integer vector +/// Converts 32-bit signed integers from both 64-bit integer vector /// parameters of [2 x i32] into 16-bit signed integer values, and constructs /// a 64-bit integer vector of [4 x i16] as the result. Positive values /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than @@ -173,7 +173,7 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); } -/// \brief Converts 16-bit signed integers from both 64-bit integer vector +/// Converts 16-bit signed integers from both 64-bit integer vector /// parameters of [4 x i16] into 8-bit unsigned integer values, and /// constructs a 64-bit integer vector of [8 x i8] as the result. Values /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated @@ -203,7 +203,7 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] +/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile @@ -230,7 +230,7 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of +/// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile @@ -253,7 +253,7 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of +/// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile @@ -274,7 +274,7 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); } -/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] +/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile @@ -301,7 +301,7 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of +/// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile @@ -324,7 +324,7 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of +/// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile @@ -345,7 +345,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); } -/// \brief Adds each 8-bit integer element of the first 64-bit integer vector +/// Adds each 8-bit integer element of the first 64-bit integer vector /// of [8 x i8] to the corresponding 8-bit integer element of the second /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are /// packed into a 64-bit integer vector of [8 x i8]. @@ -366,7 +366,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Adds each 16-bit integer element of the first 64-bit integer vector +/// Adds each 16-bit integer element of the first 64-bit integer vector /// of [4 x i16] to the corresponding 16-bit integer element of the second /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are /// packed into a 64-bit integer vector of [4 x i16]. @@ -387,7 +387,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Adds each 32-bit integer element of the first 64-bit integer vector +/// Adds each 32-bit integer element of the first 64-bit integer vector /// of [2 x i32] to the corresponding 32-bit integer element of the second /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are /// packed into a 64-bit integer vector of [2 x i32]. @@ -408,7 +408,7 @@ _mm_add_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); } -/// \brief Adds each 8-bit signed integer element of the first 64-bit integer +/// Adds each 8-bit signed integer element of the first 64-bit integer /// vector of [8 x i8] to the corresponding 8-bit signed integer element of /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to @@ -430,7 +430,7 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Adds each 16-bit signed integer element of the first 64-bit integer +/// Adds each 16-bit signed integer element of the first 64-bit integer /// vector of [4 x i16] to the corresponding 16-bit signed integer element of /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are @@ -453,7 +453,7 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer +/// Adds each 8-bit unsigned integer element of the first 64-bit integer /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are /// saturated to 0xFF. The results are packed into a 64-bit integer vector of @@ -475,7 +475,7 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer +/// Adds each 16-bit unsigned integer element of the first 64-bit integer /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element /// of the second 64-bit integer vector of [4 x i16]. Sums greater than /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit @@ -497,7 +497,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Subtracts each 8-bit integer element of the second 64-bit integer +/// Subtracts each 8-bit integer element of the second 64-bit integer /// vector of [8 x i8] from the corresponding 8-bit integer element of the /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results /// are packed into a 64-bit integer vector of [8 x i8]. @@ -518,7 +518,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Subtracts each 16-bit integer element of the second 64-bit integer +/// Subtracts each 16-bit integer element of the second 64-bit integer /// vector of [4 x i16] from the corresponding 16-bit integer element of the /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the /// results are packed into a 64-bit integer vector of [4 x i16]. @@ -539,7 +539,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Subtracts each 32-bit integer element of the second 64-bit integer +/// Subtracts each 32-bit integer element of the second 64-bit integer /// vector of [2 x i32] from the corresponding 32-bit integer element of the /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the /// results are packed into a 64-bit integer vector of [2 x i32]. @@ -560,7 +560,7 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); } -/// \brief Subtracts each 8-bit signed integer element of the second 64-bit +/// Subtracts each 8-bit signed integer element of the second 64-bit /// integer vector of [8 x i8] from the corresponding 8-bit signed integer /// element of the first 64-bit integer vector of [8 x i8]. Positive results /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 @@ -583,7 +583,7 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Subtracts each 16-bit signed integer element of the second 64-bit +/// Subtracts each 16-bit signed integer element of the second 64-bit /// integer vector of [4 x i16] from the corresponding 16-bit signed integer /// element of the first 64-bit integer vector of [4 x i16]. Positive results /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than @@ -606,7 +606,7 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit +/// Subtracts each 8-bit unsigned integer element of the second 64-bit /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer /// element of the first 64-bit integer vector of [8 x i8]. /// @@ -630,7 +630,7 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit +/// Subtracts each 16-bit unsigned integer element of the second 64-bit /// integer vector of [4 x i16] from the corresponding 16-bit unsigned /// integer element of the first 64-bit integer vector of [4 x i16]. /// @@ -654,7 +654,7 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Multiplies each 16-bit signed integer element of the first 64-bit +/// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16] and get four /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. @@ -681,7 +681,7 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Multiplies each 16-bit signed integer element of the first 64-bit +/// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. @@ -702,7 +702,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Multiplies each 16-bit signed integer element of the first 64-bit +/// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. @@ -723,7 +723,7 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Left-shifts each 16-bit signed integer element of the first +/// Left-shifts each 16-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [4 x i16], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 16 bits of the results are packed into a 64-bit integer vector of @@ -746,7 +746,7 @@ _mm_sll_pi16(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); } -/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer +/// Left-shifts each 16-bit signed integer element of a 64-bit integer /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. /// The lower 16 bits of the results are packed into a 64-bit integer vector /// of [4 x i16]. @@ -768,7 +768,7 @@ _mm_slli_pi16(__m64 __m, int __count) return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); } -/// \brief Left-shifts each 32-bit signed integer element of the first +/// Left-shifts each 32-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [2 x i32], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 32 bits of the results are packed into a 64-bit integer vector of @@ -791,7 +791,7 @@ _mm_sll_pi32(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); } -/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer +/// Left-shifts each 32-bit signed integer element of a 64-bit integer /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. /// The lower 32 bits of the results are packed into a 64-bit integer vector /// of [2 x i32]. @@ -813,7 +813,7 @@ _mm_slli_pi32(__m64 __m, int __count) return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); } -/// \brief Left-shifts the first 64-bit integer parameter by the number of bits +/// Left-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. The lower 64 bits of /// result are returned. /// @@ -833,7 +833,7 @@ _mm_sll_si64(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); } -/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the +/// Left-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. The lower 64 bits of result are returned. /// @@ -853,7 +853,7 @@ _mm_slli_si64(__m64 __m, int __count) return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); } -/// \brief Right-shifts each 16-bit integer element of the first parameter, +/// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// @@ -877,7 +877,7 @@ _mm_sra_pi16(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); } -/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector +/// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each @@ -900,7 +900,7 @@ _mm_srai_pi16(__m64 __m, int __count) return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); } -/// \brief Right-shifts each 32-bit integer element of the first parameter, +/// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// @@ -924,7 +924,7 @@ _mm_sra_pi32(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); } -/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector +/// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each @@ -947,7 +947,7 @@ _mm_srai_pi32(__m64 __m, int __count) return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); } -/// \brief Right-shifts each 16-bit integer element of the first parameter, +/// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// @@ -970,7 +970,7 @@ _mm_srl_pi16(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); } -/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector +/// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 16-bit results are packed into a 64-bit @@ -992,7 +992,7 @@ _mm_srli_pi16(__m64 __m, int __count) return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); } -/// \brief Right-shifts each 32-bit integer element of the first parameter, +/// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// @@ -1015,7 +1015,7 @@ _mm_srl_pi32(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); } -/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector +/// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 32-bit results are packed into a 64-bit @@ -1037,7 +1037,7 @@ _mm_srli_pi32(__m64 __m, int __count) return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); } -/// \brief Right-shifts the first 64-bit integer parameter by the number of bits +/// Right-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. /// /// High-order bits are cleared. @@ -1057,7 +1057,7 @@ _mm_srl_si64(__m64 __m, __m64 __count) return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); } -/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the +/// Right-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. /// @@ -1078,7 +1078,7 @@ _mm_srli_si64(__m64 __m, int __count) return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); } -/// \brief Performs a bitwise AND of two 64-bit integer vectors. +/// Performs a bitwise AND of two 64-bit integer vectors. /// /// \headerfile /// @@ -1096,7 +1096,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2) return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); } -/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then +/// Performs a bitwise NOT of the first 64-bit integer vector, and then /// performs a bitwise AND of the intermediate result and the second 64-bit /// integer vector. /// @@ -1117,7 +1117,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); } -/// \brief Performs a bitwise OR of two 64-bit integer vectors. +/// Performs a bitwise OR of two 64-bit integer vectors. /// /// \headerfile /// @@ -1135,7 +1135,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2) return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); } -/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors. +/// Performs a bitwise exclusive OR of two 64-bit integer vectors. /// /// \headerfile /// @@ -1153,7 +1153,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); } -/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of +/// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// @@ -1175,7 +1175,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of +/// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// @@ -1197,7 +1197,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of +/// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// @@ -1219,7 +1219,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); } -/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of +/// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// @@ -1241,7 +1241,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); } -/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of +/// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// @@ -1263,7 +1263,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); } -/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of +/// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// @@ -1285,7 +1285,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); } -/// \brief Constructs a 64-bit integer vector initialized to zero. +/// Constructs a 64-bit integer vector initialized to zero. /// /// \headerfile /// @@ -1295,10 +1295,10 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void) { - return (__m64){ 0LL }; + return __extension__ (__m64){ 0LL }; } -/// \brief Constructs a 64-bit integer vector initialized with the specified +/// Constructs a 64-bit integer vector initialized with the specified /// 32-bit integer values. /// /// \headerfile @@ -1319,7 +1319,7 @@ _mm_set_pi32(int __i1, int __i0) return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); } -/// \brief Constructs a 64-bit integer vector initialized with the specified +/// Constructs a 64-bit integer vector initialized with the specified /// 16-bit integer values. /// /// \headerfile @@ -1342,7 +1342,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); } -/// \brief Constructs a 64-bit integer vector initialized with the specified +/// Constructs a 64-bit integer vector initialized with the specified /// 8-bit integer values. /// /// \headerfile @@ -1375,7 +1375,7 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, __b4, __b5, __b6, __b7); } -/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the +/// Constructs a 64-bit integer vector of [2 x i32], with each of the /// 32-bit integer vector elements set to the specified 32-bit integer /// value. /// @@ -1394,7 +1394,7 @@ _mm_set1_pi32(int __i) return _mm_set_pi32(__i, __i); } -/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the +/// Constructs a 64-bit integer vector of [4 x i16], with each of the /// 16-bit integer vector elements set to the specified 16-bit integer /// value. /// @@ -1413,7 +1413,7 @@ _mm_set1_pi16(short __w) return _mm_set_pi16(__w, __w, __w, __w); } -/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the +/// Constructs a 64-bit integer vector of [8 x i8], with each of the /// 8-bit integer vector elements set to the specified 8-bit integer value. /// /// \headerfile @@ -1431,7 +1431,7 @@ _mm_set1_pi8(char __b) return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } -/// \brief Constructs a 64-bit integer vector, initialized in reverse order with +/// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 32-bit integer values. /// /// \headerfile @@ -1452,7 +1452,7 @@ _mm_setr_pi32(int __i0, int __i1) return _mm_set_pi32(__i1, __i0); } -/// \brief Constructs a 64-bit integer vector, initialized in reverse order with +/// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 16-bit integer values. /// /// \headerfile @@ -1475,7 +1475,7 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) return _mm_set_pi16(__w3, __w2, __w1, __w0); } -/// \brief Constructs a 64-bit integer vector, initialized in reverse order with +/// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 8-bit integer values. /// /// \headerfile diff --git a/lib/clang/7.0.0/include/module.modulemap b/lib/clang/8.0.0/include/module.modulemap similarity index 91% rename from lib/clang/7.0.0/include/module.modulemap rename to lib/clang/8.0.0/include/module.modulemap index 961d8dd..1d1af57 100644 --- a/lib/clang/7.0.0/include/module.modulemap +++ b/lib/clang/8.0.0/include/module.modulemap @@ -63,6 +63,17 @@ module _Builtin_intrinsics [system] [extern_c] { textual header "fma4intrin.h" textual header "mwaitxintrin.h" textual header "clzerointrin.h" + textual header "wbnoinvdintrin.h" + textual header "cldemoteintrin.h" + textual header "waitpkgintrin.h" + textual header "movdirintrin.h" + textual header "pconfigintrin.h" + textual header "sgxintrin.h" + textual header "ptwriteintrin.h" + textual header "invpcidintrin.h" + + textual header "__wmmintrin_aes.h" + textual header "__wmmintrin_pclmul.h" explicit module mm_malloc { requires !freestanding @@ -129,14 +140,6 @@ module _Builtin_intrinsics [system] [extern_c] { export aes export pclmul } - - explicit module aes { - header "__wmmintrin_aes.h" - } - - explicit module pclmul { - header "__wmmintrin_pclmul.h" - } } explicit module systemz { diff --git a/lib/clang/8.0.0/include/movdirintrin.h b/lib/clang/8.0.0/include/movdirintrin.h new file mode 100644 index 0000000..ec20c53 --- /dev/null +++ b/lib/clang/8.0.0/include/movdirintrin.h @@ -0,0 +1,63 @@ +/*===------------------------- movdirintrin.h ------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _MOVDIRINTRIN_H +#define _MOVDIRINTRIN_H + +/* Move doubleword as direct store */ +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) +_directstoreu_u32 (void *__dst, unsigned int __value) +{ + __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value); +} + +#ifdef __x86_64__ + +/* Move quadword as direct store */ +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) +_directstoreu_u64 (void *__dst, unsigned long __value) +{ + __builtin_ia32_directstore_u64((unsigned long *)__dst, __value); +} + +#endif /* __x86_64__ */ + +/* + * movdir64b - Move 64 bytes as direct store. + * The destination must be 64 byte aligned, and the store is atomic. + * The source address has no alignment requirement, and the load from + * the source address is not atomic. + */ +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdir64b"))) +_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src) +{ + __builtin_ia32_movdir64b(__dst, __src); +} + +#endif /* _MOVDIRINTRIN_H */ diff --git a/lib/clang/7.0.0/include/msa.h b/lib/clang/8.0.0/include/msa.h similarity index 100% rename from lib/clang/7.0.0/include/msa.h rename to lib/clang/8.0.0/include/msa.h diff --git a/lib/clang/7.0.0/include/mwaitxintrin.h b/lib/clang/8.0.0/include/mwaitxintrin.h similarity index 95% rename from lib/clang/7.0.0/include/mwaitxintrin.h rename to lib/clang/8.0.0/include/mwaitxintrin.h index 635f2ac..2921ead 100644 --- a/lib/clang/7.0.0/include/mwaitxintrin.h +++ b/lib/clang/8.0.0/include/mwaitxintrin.h @@ -25,8 +25,8 @@ #error "Never use directly; include instead." #endif -#ifndef _MWAITXINTRIN_H -#define _MWAITXINTRIN_H +#ifndef __MWAITXINTRIN_H +#define __MWAITXINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx"))) @@ -44,4 +44,4 @@ _mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock) #undef __DEFAULT_FN_ATTRS -#endif /* _MWAITXINTRIN_H */ +#endif /* __MWAITXINTRIN_H */ diff --git a/lib/clang/7.0.0/include/nmmintrin.h b/lib/clang/8.0.0/include/nmmintrin.h similarity index 95% rename from lib/clang/7.0.0/include/nmmintrin.h rename to lib/clang/8.0.0/include/nmmintrin.h index 57fec15..348fb8c 100644 --- a/lib/clang/7.0.0/include/nmmintrin.h +++ b/lib/clang/8.0.0/include/nmmintrin.h @@ -21,10 +21,10 @@ *===-----------------------------------------------------------------------=== */ -#ifndef _NMMINTRIN_H -#define _NMMINTRIN_H +#ifndef __NMMINTRIN_H +#define __NMMINTRIN_H /* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h, just include it now then. */ #include -#endif /* _NMMINTRIN_H */ +#endif /* __NMMINTRIN_H */ diff --git a/lib/clang/7.0.0/include/opencl-c.h b/lib/clang/8.0.0/include/opencl-c.h similarity index 99% rename from lib/clang/7.0.0/include/opencl-c.h rename to lib/clang/8.0.0/include/opencl-c.h index e648b0f..d1d9c10 100644 --- a/lib/clang/7.0.0/include/opencl-c.h +++ b/lib/clang/8.0.0/include/opencl-c.h @@ -11540,7 +11540,7 @@ half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c); * * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)). * - * The address computed as (p + (offset * n)) must be + * The address computed as (p + (offset * n)) must be * 8-bit aligned if gentype is char, uchar; * 16-bit aligned if gentype is short, ushort, half; * 32-bit aligned if gentype is int, uint, float; @@ -12888,7 +12888,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); -/** +/** * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions * and checked in Sema since they should be declared as * addr gentype* to_addr (gentype*); @@ -13773,7 +13773,7 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera // add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. -#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); @@ -14462,7 +14462,7 @@ half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask); #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf -int printf(__constant const char* st, ...); +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); #endif // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions @@ -14571,7 +14571,7 @@ int printf(__constant const char* st, ...); * only. The filter_mode specified in sampler * must be set to CLK_FILTER_NEAREST; otherwise * the values returned are undefined. - + * The read_image{f|i|ui} calls that take * integer coordinates must use a sampler with * normalized coordinates set to diff --git a/lib/clang/8.0.0/include/pconfigintrin.h b/lib/clang/8.0.0/include/pconfigintrin.h new file mode 100644 index 0000000..fee3cad --- /dev/null +++ b/lib/clang/8.0.0/include/pconfigintrin.h @@ -0,0 +1,50 @@ +/*===---- pconfigintrin.h - X86 platform configuration ---------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __PCONFIGINTRIN_H +#define __PCONFIGINTRIN_H + +#define __PCONFIG_KEY_PROGRAM 0x00000001 + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("pconfig"))) + +static __inline unsigned int __DEFAULT_FN_ATTRS +_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) +{ + unsigned int __result; + __asm__ ("pconfig" + : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) + : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) + : "cc"); + return __result; +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/clang/7.0.0/include/pkuintrin.h b/lib/clang/8.0.0/include/pkuintrin.h similarity index 93% rename from lib/clang/7.0.0/include/pkuintrin.h rename to lib/clang/8.0.0/include/pkuintrin.h index 9e54594..6976924 100644 --- a/lib/clang/7.0.0/include/pkuintrin.h +++ b/lib/clang/8.0.0/include/pkuintrin.h @@ -1,4 +1,4 @@ -/*===------------- pkuintrin.h - PKU intrinsics ------------------=== +/*===---- pkuintrin.h - PKU intrinsics -------------------------------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -40,7 +40,7 @@ _rdpkru_u32(void) static __inline__ void __DEFAULT_FN_ATTRS _wrpkru(unsigned int __val) { - return __builtin_ia32_wrpkru(__val); + __builtin_ia32_wrpkru(__val); } #undef __DEFAULT_FN_ATTRS diff --git a/lib/clang/7.0.0/include/pmmintrin.h b/lib/clang/8.0.0/include/pmmintrin.h similarity index 90% rename from lib/clang/7.0.0/include/pmmintrin.h rename to lib/clang/8.0.0/include/pmmintrin.h index 7ec08a1..7e1a9ea 100644 --- a/lib/clang/7.0.0/include/pmmintrin.h +++ b/lib/clang/8.0.0/include/pmmintrin.h @@ -28,9 +28,9 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) + __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) -/// \brief Loads data from an unaligned memory location to elements in a 128-bit +/// Loads data from an unaligned memory location to elements in a 128-bit /// vector. /// /// If the address of the data is not 16-byte aligned, the instruction may @@ -50,7 +50,7 @@ _mm_lddqu_si128(__m128i const *__p) return (__m128i)__builtin_ia32_lddqu((char const *)__p); } -/// \brief Adds the even-indexed values and subtracts the odd-indexed values of +/// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [4 x float]. /// /// \headerfile @@ -69,7 +69,7 @@ _mm_addsub_ps(__m128 __a, __m128 __b) return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in two +/// Horizontally adds the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile @@ -92,7 +92,7 @@ _mm_hadd_ps(__m128 __a, __m128 __b) return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in two +/// Horizontally subtracts the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile @@ -115,7 +115,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b) return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } -/// \brief Moves and duplicates odd-indexed values from a 128-bit vector +/// Moves and duplicates odd-indexed values from a 128-bit vector /// of [4 x float] to float values stored in a 128-bit vector of /// [4 x float]. /// @@ -137,7 +137,7 @@ _mm_movehdup_ps(__m128 __a) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); } -/// \brief Duplicates even-indexed values from a 128-bit vector of +/// Duplicates even-indexed values from a 128-bit vector of /// [4 x float] to float values stored in a 128-bit vector of [4 x float]. /// /// \headerfile @@ -158,7 +158,7 @@ _mm_moveldup_ps(__m128 __a) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); } -/// \brief Adds the even-indexed values and subtracts the odd-indexed values of +/// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [2 x double]. /// /// \headerfile @@ -177,7 +177,7 @@ _mm_addsub_pd(__m128d __a, __m128d __b) return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } -/// \brief Horizontally adds the pairs of values contained in two 128-bit +/// Horizontally adds the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile @@ -200,7 +200,7 @@ _mm_hadd_pd(__m128d __a, __m128d __b) return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } -/// \brief Horizontally subtracts the pairs of values contained in two 128-bit +/// Horizontally subtracts the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile @@ -223,13 +223,13 @@ _mm_hsub_pd(__m128d __a, __m128d __b) return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } -/// \brief Moves and duplicates one double-precision value to double-precision +/// Moves and duplicates one double-precision value to double-precision /// values stored in a 128-bit vector of [2 x double]. /// /// \headerfile /// /// \code -/// __m128d _mm_loaddup_pd(double const * dp); +/// __m128d _mm_loaddup_pd(double const *dp); /// \endcode /// /// This intrinsic corresponds to the VMOVDDUP instruction. @@ -240,7 +240,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b) /// duplicated values. #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) -/// \brief Moves and duplicates the double-precision value in the lower bits of +/// Moves and duplicates the double-precision value in the lower bits of /// a 128-bit vector of [2 x double] to double-precision values stored in a /// 128-bit vector of [2 x double]. /// @@ -259,7 +259,7 @@ _mm_movedup_pd(__m128d __a) return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } -/// \brief Establishes a linear address memory range to be monitored and puts +/// Establishes a linear address memory range to be monitored and puts /// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// @@ -280,7 +280,7 @@ _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) __builtin_ia32_monitor((void *)__p, __extensions, __hints); } -/// \brief Used with the MONITOR instruction to wait while the processor is in +/// Used with the MONITOR instruction to wait while the processor is in /// the monitor event pending state. Data stored in the monitored address /// range causes the processor to exit the pending state. /// diff --git a/lib/clang/7.0.0/include/popcntintrin.h b/lib/clang/8.0.0/include/popcntintrin.h similarity index 88% rename from lib/clang/7.0.0/include/popcntintrin.h rename to lib/clang/8.0.0/include/popcntintrin.h index 0b4793e..75ceab9 100644 --- a/lib/clang/7.0.0/include/popcntintrin.h +++ b/lib/clang/8.0.0/include/popcntintrin.h @@ -21,13 +21,13 @@ *===-----------------------------------------------------------------------=== */ -#ifndef _POPCNTINTRIN_H -#define _POPCNTINTRIN_H +#ifndef __POPCNTINTRIN_H +#define __POPCNTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) -/// \brief Counts the number of bits in the source operand having a value of 1. +/// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// @@ -43,7 +43,7 @@ _mm_popcnt_u32(unsigned int __A) return __builtin_popcount(__A); } -/// \brief Counts the number of bits in the source operand having a value of 1. +/// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// @@ -60,7 +60,7 @@ _popcnt32(int __A) } #ifdef __x86_64__ -/// \brief Counts the number of bits in the source operand having a value of 1. +/// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// @@ -76,7 +76,7 @@ _mm_popcnt_u64(unsigned long long __A) return __builtin_popcountll(__A); } -/// \brief Counts the number of bits in the source operand having a value of 1. +/// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// @@ -95,4 +95,4 @@ _popcnt64(long long __A) #undef __DEFAULT_FN_ATTRS -#endif /* _POPCNTINTRIN_H */ +#endif /* __POPCNTINTRIN_H */ diff --git a/lib/clang/7.0.0/include/prfchwintrin.h b/lib/clang/8.0.0/include/prfchwintrin.h similarity index 92% rename from lib/clang/7.0.0/include/prfchwintrin.h rename to lib/clang/8.0.0/include/prfchwintrin.h index b52f31d..7085139 100644 --- a/lib/clang/7.0.0/include/prfchwintrin.h +++ b/lib/clang/8.0.0/include/prfchwintrin.h @@ -28,8 +28,7 @@ #ifndef __PRFCHWINTRIN_H #define __PRFCHWINTRIN_H -#if defined(__PRFCHW__) || defined(__3dNOW__) -/// \brief Loads a memory sequence containing the specified memory address into +/// Loads a memory sequence containing the specified memory address into /// all data cache levels. The cache-coherency state is set to exclusive. /// Data can be read from and written to the cache line without additional /// delay. @@ -46,7 +45,7 @@ _m_prefetch(void *__P) __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); } -/// \brief Loads a memory sequence containing the specified memory address into +/// Loads a memory sequence containing the specified memory address into /// the L1 data cache and sets the cache-coherency to modified. This /// provides a hint to the processor that the cache line will be modified. /// It is intended for use when the cache line will be written to shortly @@ -66,6 +65,5 @@ _m_prefetchw(void *__P) { __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } -#endif #endif /* __PRFCHWINTRIN_H */ diff --git a/lib/clang/8.0.0/include/ptwriteintrin.h b/lib/clang/8.0.0/include/ptwriteintrin.h new file mode 100644 index 0000000..1bb1df0 --- /dev/null +++ b/lib/clang/8.0.0/include/ptwriteintrin.h @@ -0,0 +1,51 @@ +/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __PTWRITEINTRIN_H +#define __PTWRITEINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("ptwrite"))) + +static __inline__ void __DEFAULT_FN_ATTRS +_ptwrite32(unsigned int __value) { + __builtin_ia32_ptwrite32(__value); +} + +#ifdef __x86_64__ + +static __inline__ void __DEFAULT_FN_ATTRS +_ptwrite64(unsigned long long __value) { + __builtin_ia32_ptwrite64(__value); +} + +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __PTWRITEINTRIN_H */ diff --git a/lib/clang/7.0.0/include/rdseedintrin.h b/lib/clang/8.0.0/include/rdseedintrin.h similarity index 97% rename from lib/clang/7.0.0/include/rdseedintrin.h rename to lib/clang/8.0.0/include/rdseedintrin.h index 421f4ea..4194669 100644 --- a/lib/clang/7.0.0/include/rdseedintrin.h +++ b/lib/clang/8.0.0/include/rdseedintrin.h @@ -21,7 +21,7 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif diff --git a/lib/clang/7.0.0/include/rtmintrin.h b/lib/clang/8.0.0/include/rtmintrin.h similarity index 100% rename from lib/clang/7.0.0/include/rtmintrin.h rename to lib/clang/8.0.0/include/rtmintrin.h diff --git a/lib/clang/7.0.0/include/s390intrin.h b/lib/clang/8.0.0/include/s390intrin.h similarity index 100% rename from lib/clang/7.0.0/include/s390intrin.h rename to lib/clang/8.0.0/include/s390intrin.h diff --git a/lib/clang/7.0.0/include/sanitizer/allocator_interface.h b/lib/clang/8.0.0/include/sanitizer/allocator_interface.h similarity index 99% rename from lib/clang/7.0.0/include/sanitizer/allocator_interface.h rename to lib/clang/8.0.0/include/sanitizer/allocator_interface.h index 89f3283..e44c4a1 100644 --- a/lib/clang/7.0.0/include/sanitizer/allocator_interface.h +++ b/lib/clang/8.0.0/include/sanitizer/allocator_interface.h @@ -82,7 +82,6 @@ extern "C" { Currently available with ASan only. */ void __sanitizer_purge_allocator(void); - #ifdef __cplusplus } // extern "C" #endif diff --git a/lib/clang/7.0.0/include/sanitizer/asan_interface.h b/lib/clang/8.0.0/include/sanitizer/asan_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/asan_interface.h rename to lib/clang/8.0.0/include/sanitizer/asan_interface.h diff --git a/lib/clang/7.0.0/include/sanitizer/common_interface_defs.h b/lib/clang/8.0.0/include/sanitizer/common_interface_defs.h similarity index 94% rename from lib/clang/7.0.0/include/sanitizer/common_interface_defs.h rename to lib/clang/8.0.0/include/sanitizer/common_interface_defs.h index 6d4326f..bf015eb 100644 --- a/lib/clang/7.0.0/include/sanitizer/common_interface_defs.h +++ b/lib/clang/8.0.0/include/sanitizer/common_interface_defs.h @@ -65,6 +65,11 @@ extern "C" { void __sanitizer_unaligned_store32(void *p, uint32_t x); void __sanitizer_unaligned_store64(void *p, uint64_t x); + // Returns 1 on the first call, then returns 0 thereafter. Called by the tool + // to ensure only one report is printed when multiple errors occur + // simultaneously. + int __sanitizer_acquire_crash_state(); + // Annotate the current state of a contiguous container, such as // std::vector, std::string or similar. // A contiguous container is a container that keeps all of its elements @@ -119,6 +124,12 @@ extern "C" { // Symbolizes the supplied 'pc' using the format string 'fmt'. // Outputs at most 'out_buf_size' bytes into 'out_buf'. + // If 'out_buf' is not empty then output is zero or more non empty C strings + // followed by single empty C string. Multiple strings can be returned if PC + // corresponds to inlined function. Inlined frames are printed in the order + // from "most-inlined" to the "least-inlined", so the last frame should be the + // not inlined function. + // Inlined frames can be removed with 'symbolize_inline_frames=0'. // The format syntax is described in // lib/sanitizer_common/sanitizer_stacktrace_printer.h. void __sanitizer_symbolize_pc(void *pc, const char *fmt, char *out_buf, diff --git a/lib/clang/7.0.0/include/sanitizer/coverage_interface.h b/lib/clang/8.0.0/include/sanitizer/coverage_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/coverage_interface.h rename to lib/clang/8.0.0/include/sanitizer/coverage_interface.h diff --git a/lib/clang/7.0.0/include/sanitizer/dfsan_interface.h b/lib/clang/8.0.0/include/sanitizer/dfsan_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/dfsan_interface.h rename to lib/clang/8.0.0/include/sanitizer/dfsan_interface.h diff --git a/lib/clang/7.0.0/include/sanitizer/esan_interface.h b/lib/clang/8.0.0/include/sanitizer/esan_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/esan_interface.h rename to lib/clang/8.0.0/include/sanitizer/esan_interface.h diff --git a/lib/clang/8.0.0/include/sanitizer/hwasan_interface.h b/lib/clang/8.0.0/include/sanitizer/hwasan_interface.h new file mode 100644 index 0000000..1affd87 --- /dev/null +++ b/lib/clang/8.0.0/include/sanitizer/hwasan_interface.h @@ -0,0 +1,84 @@ +//===-- sanitizer/asan_interface.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of HWAddressSanitizer. +// +// Public interface header. +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_HWASAN_INTERFACE_H +#define SANITIZER_HWASAN_INTERFACE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + // Initialize shadow but not the rest of the runtime. + // Does not call libc unless there is an error. + // Can be called multiple times, or not at all (in which case shadow will + // be initialized in compiler-inserted __hwasan_init() call). + void __hwasan_shadow_init(void); + + // This function may be optionally provided by user and should return + // a string containing HWASan runtime options. See asan_flags.h for details. + const char* __hwasan_default_options(void); + + void __hwasan_enable_allocator_tagging(void); + void __hwasan_disable_allocator_tagging(void); + + // Mark region of memory with the given tag. Both address and size need to be + // 16-byte aligned. + void __hwasan_tag_memory(const volatile void *p, unsigned char tag, + size_t size); + + /// Set pointer tag. Previous tag is lost. + void *__hwasan_tag_pointer(const volatile void *p, unsigned char tag); + + // Set memory tag from the current SP address to the given address to zero. + // This is meant to annotate longjmp and other non-local jumps. + // This function needs to know the (almost) exact destination frame address; + // clearing shadow for the entire thread stack like __asan_handle_no_return + // does would cause false reports. + void __hwasan_handle_longjmp(const void *sp_dst); + + // Libc hook for thread creation. Should be called in the child thread before + // any instrumented code. + void __hwasan_thread_enter(); + + // Libc hook for thread destruction. No instrumented code should run after + // this call. + void __hwasan_thread_exit(); + + // Print shadow and origin for the memory range to stderr in a human-readable + // format. + void __hwasan_print_shadow(const volatile void *x, size_t size); + + // Print one-line report about the memory usage of the current process. + void __hwasan_print_memory_usage(); + + int __sanitizer_posix_memalign(void **memptr, size_t alignment, size_t size); + void * __sanitizer_memalign(size_t alignment, size_t size); + void * __sanitizer_aligned_alloc(size_t alignment, size_t size); + void * __sanitizer___libc_memalign(size_t alignment, size_t size); + void * __sanitizer_valloc(size_t size); + void * __sanitizer_pvalloc(size_t size); + void __sanitizer_free(void *ptr); + void __sanitizer_cfree(void *ptr); + size_t __sanitizer_malloc_usable_size(const void *ptr); + struct mallinfo __sanitizer_mallinfo(); + int __sanitizer_mallopt(int cmd, int value); + void __sanitizer_malloc_stats(void); + void * __sanitizer_calloc(size_t nmemb, size_t size); + void * __sanitizer_realloc(void *ptr, size_t size); + void * __sanitizer_malloc(size_t size); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SANITIZER_HWASAN_INTERFACE_H diff --git a/lib/clang/7.0.0/include/sanitizer/linux_syscall_hooks.h b/lib/clang/8.0.0/include/sanitizer/linux_syscall_hooks.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/linux_syscall_hooks.h rename to lib/clang/8.0.0/include/sanitizer/linux_syscall_hooks.h diff --git a/lib/clang/7.0.0/include/sanitizer/lsan_interface.h b/lib/clang/8.0.0/include/sanitizer/lsan_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/lsan_interface.h rename to lib/clang/8.0.0/include/sanitizer/lsan_interface.h diff --git a/lib/clang/7.0.0/include/sanitizer/msan_interface.h b/lib/clang/8.0.0/include/sanitizer/msan_interface.h similarity index 93% rename from lib/clang/7.0.0/include/sanitizer/msan_interface.h rename to lib/clang/8.0.0/include/sanitizer/msan_interface.h index a87c954..0509551 100644 --- a/lib/clang/7.0.0/include/sanitizer/msan_interface.h +++ b/lib/clang/8.0.0/include/sanitizer/msan_interface.h @@ -104,6 +104,14 @@ extern "C" { copy. Source and destination regions can overlap. */ void __msan_copy_shadow(const volatile void *dst, const volatile void *src, size_t size); + + /* Disables uninitialized memory checks in interceptors. */ + void __msan_scoped_disable_interceptor_checks(void); + + /* Re-enables uninitialized memory checks in interceptors after a previous + call to __msan_scoped_disable_interceptor_checks. */ + void __msan_scoped_enable_interceptor_checks(void); + #ifdef __cplusplus } // extern "C" #endif diff --git a/lib/clang/7.0.0/include/sanitizer/netbsd_syscall_hooks.h b/lib/clang/8.0.0/include/sanitizer/netbsd_syscall_hooks.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/netbsd_syscall_hooks.h rename to lib/clang/8.0.0/include/sanitizer/netbsd_syscall_hooks.h diff --git a/lib/clang/7.0.0/include/sanitizer/scudo_interface.h b/lib/clang/8.0.0/include/sanitizer/scudo_interface.h similarity index 85% rename from lib/clang/7.0.0/include/sanitizer/scudo_interface.h rename to lib/clang/8.0.0/include/sanitizer/scudo_interface.h index 25979fb..be605f1 100644 --- a/lib/clang/7.0.0/include/sanitizer/scudo_interface.h +++ b/lib/clang/8.0.0/include/sanitizer/scudo_interface.h @@ -27,6 +27,11 @@ extern "C" { // can be removed by setting LimitMb to 0. This function's parameters should // be fully trusted to avoid security mishaps. void __scudo_set_rss_limit(size_t LimitMb, int HardLimit); + + // This function outputs various allocator statistics for both the Primary + // and Secondary allocators, including memory usage, number of allocations + // and deallocations. + void __scudo_print_stats(void); #ifdef __cplusplus } // extern "C" #endif diff --git a/lib/clang/7.0.0/include/sanitizer/tsan_interface.h b/lib/clang/8.0.0/include/sanitizer/tsan_interface.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/tsan_interface.h rename to lib/clang/8.0.0/include/sanitizer/tsan_interface.h diff --git a/lib/clang/7.0.0/include/sanitizer/tsan_interface_atomic.h b/lib/clang/8.0.0/include/sanitizer/tsan_interface_atomic.h similarity index 100% rename from lib/clang/7.0.0/include/sanitizer/tsan_interface_atomic.h rename to lib/clang/8.0.0/include/sanitizer/tsan_interface_atomic.h diff --git a/lib/clang/8.0.0/include/sgxintrin.h b/lib/clang/8.0.0/include/sgxintrin.h new file mode 100644 index 0000000..20aee76 --- /dev/null +++ b/lib/clang/8.0.0/include/sgxintrin.h @@ -0,0 +1,70 @@ +/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __SGXINTRIN_H +#define __SGXINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sgx"))) + +static __inline unsigned int __DEFAULT_FN_ATTRS +_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) +{ + unsigned int __result; + __asm__ ("enclu" + : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) + : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) + : "cc"); + return __result; +} + +static __inline unsigned int __DEFAULT_FN_ATTRS +_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) +{ + unsigned int __result; + __asm__ ("encls" + : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) + : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) + : "cc"); + return __result; +} + +static __inline unsigned int __DEFAULT_FN_ATTRS +_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) +{ + unsigned int __result; + __asm__ ("enclv" + : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) + : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) + : "cc"); + return __result; +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/clang/7.0.0/include/shaintrin.h b/lib/clang/8.0.0/include/shaintrin.h similarity index 95% rename from lib/clang/7.0.0/include/shaintrin.h rename to lib/clang/8.0.0/include/shaintrin.h index 9b5d218..3df4718 100644 --- a/lib/clang/7.0.0/include/shaintrin.h +++ b/lib/clang/8.0.0/include/shaintrin.h @@ -29,10 +29,10 @@ #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128))) -#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ - __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); }) +#define _mm_sha1rnds4_epu32(V1, V2, M) \ + __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) diff --git a/lib/clang/7.0.0/include/smmintrin.h b/lib/clang/8.0.0/include/smmintrin.h similarity index 89% rename from lib/clang/7.0.0/include/smmintrin.h rename to lib/clang/8.0.0/include/smmintrin.h index e02775c..4806b3e 100644 --- a/lib/clang/7.0.0/include/smmintrin.h +++ b/lib/clang/8.0.0/include/smmintrin.h @@ -21,13 +21,13 @@ *===-----------------------------------------------------------------------=== */ -#ifndef _SMMINTRIN_H -#define _SMMINTRIN_H +#ifndef __SMMINTRIN_H +#define __SMMINTRIN_H #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 @@ -46,7 +46,7 @@ #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) -/// \brief Rounds up each element of the 128-bit vector of [4 x float] to an +/// Rounds up each element of the 128-bit vector of [4 x float] to an /// integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// @@ -63,7 +63,7 @@ /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) -/// \brief Rounds up each element of the 128-bit vector of [2 x double] to an +/// Rounds up each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// @@ -80,7 +80,7 @@ /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) -/// \brief Copies three upper elements of the first 128-bit vector operand to +/// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds up the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit @@ -105,7 +105,7 @@ /// values. #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) -/// \brief Copies the upper element of the first 128-bit vector operand to the +/// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds up the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector @@ -130,7 +130,7 @@ /// values. #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) -/// \brief Rounds down each element of the 128-bit vector of [4 x float] to an +/// Rounds down each element of the 128-bit vector of [4 x float] to an /// an integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// @@ -147,7 +147,7 @@ /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) -/// \brief Rounds down each element of the 128-bit vector of [2 x double] to an +/// Rounds down each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// @@ -164,7 +164,7 @@ /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) -/// \brief Copies three upper elements of the first 128-bit vector operand to +/// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds down the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit @@ -189,7 +189,7 @@ /// values. #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) -/// \brief Copies the upper element of the first 128-bit vector operand to the +/// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds down the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector @@ -214,7 +214,7 @@ /// values. #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) -/// \brief Rounds each element of the 128-bit vector of [4 x float] to an +/// Rounds each element of the 128-bit vector of [4 x float] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [4 x float]. @@ -244,10 +244,10 @@ /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. -#define _mm_round_ps(X, M) __extension__ ({ \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) +#define _mm_round_ps(X, M) \ + (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) -/// \brief Copies three upper elements of the first 128-bit vector operand to +/// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds the lowest element of the second 128-bit vector /// operand to an integer value according to the rounding control specified @@ -285,11 +285,11 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. -#define _mm_round_ss(X, Y, M) __extension__ ({ \ +#define _mm_round_ss(X, Y, M) \ (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)); }) + (__v4sf)(__m128)(Y), (M)) -/// \brief Rounds each element of the 128-bit vector of [2 x double] to an +/// Rounds each element of the 128-bit vector of [2 x double] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [2 x double]. @@ -319,10 +319,10 @@ /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the rounded values. -#define _mm_round_pd(X, M) __extension__ ({ \ - (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) +#define _mm_round_pd(X, M) \ + (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)) -/// \brief Copies the upper element of the first 128-bit vector operand to the +/// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds the lower element of the second 128-bit vector operand to an /// integer value according to the rounding control specified by the third @@ -360,12 +360,12 @@ /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. -#define _mm_round_sd(X, Y, M) __extension__ ({ \ +#define _mm_round_sd(X, Y, M) \ (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (M)); }) + (__v2df)(__m128d)(Y), (M)) /* SSE4 Packed Blending Intrinsics. */ -/// \brief Returns a 128-bit vector of [2 x double] where the values are +/// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// @@ -389,13 +389,11 @@ /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. -#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ - (__v2df)(__m128d)(V2), \ - (((M) & 0x01) ? 2 : 0), \ - (((M) & 0x02) ? 3 : 1)); }) +#define _mm_blend_pd(V1, V2, M) \ + (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \ + (__v2df)(__m128d)(V2), (int)(M)) -/// \brief Returns a 128-bit vector of [4 x float] where the values are selected +/// Returns a 128-bit vector of [4 x float] where the values are selected /// from either the first or second operand as specified by the third /// operand, the control mask. /// @@ -419,14 +417,11 @@ /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. -#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) - -/// \brief Returns a 128-bit vector of [2 x double] where the values are +#define _mm_blend_ps(V1, V2, M) \ + (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \ + (__v4sf)(__m128)(V2), (int)(M)) + +/// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// @@ -453,7 +448,7 @@ _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) (__v2df)__M); } -/// \brief Returns a 128-bit vector of [4 x float] where the values are +/// Returns a 128-bit vector of [4 x float] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// @@ -480,7 +475,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) (__v4sf)__M); } -/// \brief Returns a 128-bit vector of [16 x i8] where the values are selected +/// Returns a 128-bit vector of [16 x i8] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// @@ -493,7 +488,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \param __M -/// A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying +/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 8-bit element in operand \a __V1 is copied to the same @@ -507,7 +502,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) (__v16qi)__M); } -/// \brief Returns a 128-bit vector of [8 x i16] where the values are selected +/// Returns a 128-bit vector of [8 x i16] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// @@ -531,20 +526,12 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [8 x i16] containing the copied values. -#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ - (__v8hi)(__m128i)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) +#define _mm_blend_epi16(V1, V2, M) \ + (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \ + (__v8hi)(__m128i)(V2), (int)(M)) /* SSE4 Dword Multiply Instructions. */ -/// \brief Multiples corresponding elements of two 128-bit vectors of [4 x i32] +/// Multiples corresponding elements of two 128-bit vectors of [4 x i32] /// and returns the lower 32 bits of the each product in a 128-bit vector of /// [4 x i32]. /// @@ -563,7 +550,7 @@ _mm_mullo_epi32 (__m128i __V1, __m128i __V2) return (__m128i) ((__v4su)__V1 * (__v4su)__V2); } -/// \brief Multiplies corresponding even-indexed elements of two 128-bit +/// Multiplies corresponding even-indexed elements of two 128-bit /// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64] /// containing the products. /// @@ -584,7 +571,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) } /* SSE4 Floating Point Dot Product Instructions. */ -/// \brief Computes the dot product of the two 128-bit vectors of [4 x float] +/// Computes the dot product of the two 128-bit vectors of [4 x float] /// and returns it in the elements of the 128-bit result vector of /// [4 x float]. /// @@ -616,11 +603,11 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) /// each [4 x float] subvector. If a bit is set, the dot product is returned /// in the corresponding element; otherwise that element is set to zero. /// \returns A 128-bit vector of [4 x float] containing the dot product. -#define _mm_dp_ps(X, Y, M) __extension__ ({ \ +#define _mm_dp_ps(X, Y, M) \ (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)); }) + (__v4sf)(__m128)(Y), (M)) -/// \brief Computes the dot product of the two 128-bit vectors of [2 x double] +/// Computes the dot product of the two 128-bit vectors of [2 x double] /// and returns it in the elements of the 128-bit result vector of /// [2 x double]. /// @@ -651,12 +638,12 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) /// to the lowest element and bit [1] corresponding to the highest element of /// each [2 x double] vector. If a bit is set, the dot product is returned in /// the corresponding element; otherwise that element is set to zero. -#define _mm_dp_pd(X, Y, M) __extension__ ({\ +#define _mm_dp_pd(X, Y, M) \ (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (M)); }) + (__v2df)(__m128d)(Y), (M)) /* SSE4 Streaming Load Hint Instruction. */ -/// \brief Loads integer values from a 128-bit aligned memory location to a +/// Loads integer values from a 128-bit aligned memory location to a /// 128-bit integer vector. /// /// \headerfile @@ -675,7 +662,7 @@ _mm_stream_load_si128 (__m128i const *__V) } /* SSE4 Packed Integer Min/Max Instructions. */ -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser /// of the two values. /// @@ -694,7 +681,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the /// greater value of the two. /// @@ -713,7 +700,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser /// value of the two. /// @@ -732,7 +719,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the /// greater value of the two. /// @@ -751,7 +738,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser /// value of the two. /// @@ -770,7 +757,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the /// greater value of the two. /// @@ -789,7 +776,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser /// value of the two. /// @@ -808,7 +795,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2) return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); } -/// \brief Compares the corresponding elements of two 128-bit vectors of +/// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the /// greater value of the two. /// @@ -828,7 +815,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ -/// \brief Takes the first argument \a X and inserts an element from the second +/// Takes the first argument \a X and inserts an element from the second /// argument \a Y as selected by the third argument \a N. That result then /// has elements zeroed out also as selected by the third argument \a N. The /// resulting 128-bit vector of [4 x float] is then returned. @@ -870,7 +857,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// single-precision floating point elements from the operands. #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) -/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and +/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and /// returns it, using the immediate value parameter \a N as a selector. /// /// \headerfile @@ -893,15 +880,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 11: Bits [127:96] of parameter \a X are returned. /// \returns A 32-bit integer containing the extracted 32 bits of float data. #define _mm_extract_ps(X, N) (__extension__ \ - ({ union { int __i; float __f; } __t; \ - __v4sf __a = (__v4sf)(__m128)(X); \ - __t.__f = __a[(N) & 3]; \ - __t.__i;})) + ({ union { int __i; float __f; } __t; \ + __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ + __t.__i;})) /* Miscellaneous insert and extract macros. */ /* Extract a single-precision float from X at index N into D. */ -#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ - (D) = __a[N]; })) +#define _MM_EXTRACT_FLOAT(D, X, N) \ + { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create an index suitable for _mm_insert_ps. */ @@ -912,7 +898,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) /* Insert int into packed integer array at index. */ -/// \brief Constructs a 128-bit vector of [16 x i8] by first making a copy of +/// Constructs a 128-bit vector of [16 x i8] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the lower 8 bits /// of an integer parameter \a I into an offset specified by the immediate /// value parameter \a N. @@ -952,12 +938,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 1110: Bits [119:112] of the result are used for insertion. \n /// 1111: Bits [127:120] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi8(X, I, N) (__extension__ \ - ({ __v16qi __a = (__v16qi)(__m128i)(X); \ - __a[(N) & 15] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi8(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \ + (int)(I), (int)(N)) -/// \brief Constructs a 128-bit vector of [4 x i32] by first making a copy of +/// Constructs a 128-bit vector of [4 x i32] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 32-bit /// integer parameter \a I at the offset specified by the immediate value /// parameter \a N. @@ -985,13 +970,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 10: Bits [95:64] of the result are used for insertion. \n /// 11: Bits [127:96] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi32(X, I, N) (__extension__ \ - ({ __v4si __a = (__v4si)(__m128i)(X); \ - __a[(N) & 3] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi32(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \ + (int)(I), (int)(N)) #ifdef __x86_64__ -/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of +/// Constructs a 128-bit vector of [2 x i64] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 64-bit /// integer parameter \a I, using the immediate value parameter \a N as an /// insertion location selector. @@ -1017,16 +1001,15 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 0: Bits [63:0] of the result are used for insertion. \n /// 1: Bits [127:64] of the result are used for insertion. \n /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi64(X, I, N) (__extension__ \ - ({ __v2di __a = (__v2di)(__m128i)(X); \ - __a[(N) & 1] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi64(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \ + (long long)(I), (int)(N)) #endif /* __x86_64__ */ /* Extract int from packed integer array at index. This returns the element * as a zero extended value, so it is unsigned. */ -/// \brief Extracts an 8-bit element from the 128-bit integer vector of +/// Extracts an 8-bit element from the 128-bit integer vector of /// [16 x i8], using the immediate value parameter \a N as a selector. /// /// \headerfile @@ -1061,11 +1044,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// \returns An unsigned integer, whose lower 8 bits are selected from the /// 128-bit integer vector parameter and the remaining bits are assigned /// zeros. -#define _mm_extract_epi8(X, N) (__extension__ \ - ({ __v16qi __a = (__v16qi)(__m128i)(X); \ - (int)(unsigned char) __a[(N) & 15];})) +#define _mm_extract_epi8(X, N) \ + (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \ + (int)(N)) -/// \brief Extracts a 32-bit element from the 128-bit integer vector of +/// Extracts a 32-bit element from the 128-bit integer vector of /// [4 x i32], using the immediate value parameter \a N as a selector. /// /// \headerfile @@ -1087,12 +1070,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 11: Bits [127:96] of the parameter \a X are exracted. /// \returns An integer, whose lower 32 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. -#define _mm_extract_epi32(X, N) (__extension__ \ - ({ __v4si __a = (__v4si)(__m128i)(X); \ - (int)__a[(N) & 3];})) +#define _mm_extract_epi32(X, N) \ + (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N)) #ifdef __x86_64__ -/// \brief Extracts a 64-bit element from the 128-bit integer vector of +/// Extracts a 64-bit element from the 128-bit integer vector of /// [2 x i64], using the immediate value parameter \a N as a selector. /// /// \headerfile @@ -1111,13 +1093,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 0: Bits [63:0] are returned. \n /// 1: Bits [127:64] are returned. \n /// \returns A 64-bit integer. -#define _mm_extract_epi64(X, N) (__extension__ \ - ({ __v2di __a = (__v2di)(__m128i)(X); \ - (long long)__a[(N) & 1];})) +#define _mm_extract_epi64(X, N) \ + (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N)) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ -/// \brief Tests whether the specified bits in a 128-bit integer vector are all +/// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile @@ -1135,7 +1116,7 @@ _mm_testz_si128(__m128i __M, __m128i __V) return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } -/// \brief Tests whether the specified bits in a 128-bit integer vector are all +/// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile @@ -1153,7 +1134,7 @@ _mm_testc_si128(__m128i __M, __m128i __V) return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } -/// \brief Tests whether the specified bits in a 128-bit integer vector are +/// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile @@ -1172,7 +1153,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V) return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } -/// \brief Tests whether the specified bits in a 128-bit integer vector are all +/// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile @@ -1189,7 +1170,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V) /// otherwise. #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) -/// \brief Tests whether the specified bits in a 128-bit integer vector are +/// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile @@ -1208,7 +1189,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V) /// FALSE otherwise. #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) -/// \brief Tests whether the specified bits in a 128-bit integer vector are all +/// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile @@ -1227,7 +1208,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V) #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) /* SSE4 64-bit Packed Integer Comparisons. */ -/// \brief Compares each of the corresponding 64-bit values of the 128-bit +/// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors for equality. /// /// \headerfile @@ -1246,7 +1227,7 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) } /* SSE4 Packed Integer Sign-Extension. */ -/// \brief Sign-extends each of the lower eight 8-bit integer elements of a +/// Sign-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. @@ -1267,7 +1248,7 @@ _mm_cvtepi8_epi16(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } -/// \brief Sign-extends each of the lower four 8-bit integer elements of a +/// Sign-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. @@ -1277,8 +1258,8 @@ _mm_cvtepi8_epi16(__m128i __V) /// This intrinsic corresponds to the VPMOVSXBD / PMOVSXBD instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign- -/// extended to 32-bit values. +/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are +/// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) @@ -1288,7 +1269,7 @@ _mm_cvtepi8_epi32(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); } -/// \brief Sign-extends each of the lower two 8-bit integer elements of a +/// Sign-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. @@ -1298,8 +1279,8 @@ _mm_cvtepi8_epi32(__m128i __V) /// This intrinsic corresponds to the VPMOVSXBQ / PMOVSXBQ instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign- -/// extended to 64-bit values. +/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are +/// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) @@ -1309,7 +1290,7 @@ _mm_cvtepi8_epi64(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); } -/// \brief Sign-extends each of the lower four 16-bit integer elements of a +/// Sign-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. @@ -1319,8 +1300,8 @@ _mm_cvtepi8_epi64(__m128i __V) /// This intrinsic corresponds to the VPMOVSXWD / PMOVSXWD instruction. /// /// \param __V -/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign- -/// extended to 32-bit values. +/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are +/// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) @@ -1328,7 +1309,7 @@ _mm_cvtepi16_epi32(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); } -/// \brief Sign-extends each of the lower two 16-bit integer elements of a +/// Sign-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input /// vector are unused. @@ -1338,8 +1319,8 @@ _mm_cvtepi16_epi32(__m128i __V) /// This intrinsic corresponds to the VPMOVSXWQ / PMOVSXWQ instruction. /// /// \param __V -/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign- -/// extended to 64-bit values. +/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are +/// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) @@ -1347,7 +1328,7 @@ _mm_cvtepi16_epi64(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); } -/// \brief Sign-extends each of the lower two 32-bit integer elements of a +/// Sign-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. @@ -1357,8 +1338,8 @@ _mm_cvtepi16_epi64(__m128i __V) /// This intrinsic corresponds to the VPMOVSXDQ / PMOVSXDQ instruction. /// /// \param __V -/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign- -/// extended to 64-bit values. +/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are +/// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) @@ -1367,7 +1348,7 @@ _mm_cvtepi32_epi64(__m128i __V) } /* SSE4 Packed Integer Zero-Extension. */ -/// \brief Zero-extends each of the lower eight 8-bit integer elements of a +/// Zero-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. @@ -1377,8 +1358,8 @@ _mm_cvtepi32_epi64(__m128i __V) /// This intrinsic corresponds to the VPMOVZXBW / PMOVZXBW instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero- -/// extended to 16-bit values. +/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are +/// zero-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) @@ -1386,7 +1367,7 @@ _mm_cvtepu8_epi16(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } -/// \brief Zero-extends each of the lower four 8-bit integer elements of a +/// Zero-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. @@ -1396,8 +1377,8 @@ _mm_cvtepu8_epi16(__m128i __V) /// This intrinsic corresponds to the VPMOVZXBD / PMOVZXBD instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero- -/// extended to 32-bit values. +/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are +/// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) @@ -1405,7 +1386,7 @@ _mm_cvtepu8_epi32(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } -/// \brief Zero-extends each of the lower two 8-bit integer elements of a +/// Zero-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. @@ -1415,8 +1396,8 @@ _mm_cvtepu8_epi32(__m128i __V) /// This intrinsic corresponds to the VPMOVZXBQ / PMOVZXBQ instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero- -/// extended to 64-bit values. +/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are +/// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) @@ -1424,7 +1405,7 @@ _mm_cvtepu8_epi64(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } -/// \brief Zero-extends each of the lower four 16-bit integer elements of a +/// Zero-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. @@ -1434,8 +1415,8 @@ _mm_cvtepu8_epi64(__m128i __V) /// This intrinsic corresponds to the VPMOVZXWD / PMOVZXWD instruction. /// /// \param __V -/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero- -/// extended to 32-bit values. +/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are +/// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) @@ -1443,7 +1424,7 @@ _mm_cvtepu16_epi32(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } -/// \brief Zero-extends each of the lower two 16-bit integer elements of a +/// Zero-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input vector /// are unused. @@ -1453,8 +1434,8 @@ _mm_cvtepu16_epi32(__m128i __V) /// This intrinsic corresponds to the VPMOVZXWQ / PMOVZXWQ instruction. /// /// \param __V -/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero- -/// extended to 64-bit values. +/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are +/// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) @@ -1462,7 +1443,7 @@ _mm_cvtepu16_epi64(__m128i __V) return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } -/// \brief Zero-extends each of the lower two 32-bit integer elements of a +/// Zero-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. @@ -1472,8 +1453,8 @@ _mm_cvtepu16_epi64(__m128i __V) /// This intrinsic corresponds to the VPMOVZXDQ / PMOVZXDQ instruction. /// /// \param __V -/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero- -/// extended to 64-bit values. +/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are +/// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) @@ -1482,7 +1463,7 @@ _mm_cvtepu32_epi64(__m128i __V) } /* SSE4 Pack with Unsigned Saturation. */ -/// \brief Converts 32-bit signed integers from both 128-bit integer vector +/// Converts 32-bit signed integers from both 128-bit integer vector /// operands into 16-bit unsigned integers, and returns the packed result. /// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than /// 0x0000 are saturated to 0x0000. @@ -1511,7 +1492,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) } /* SSE4 Multiple Packed Sums of Absolute Difference. */ -/// \brief Subtracts 8-bit unsigned integer values and computes the absolute +/// Subtracts 8-bit unsigned integer values and computes the absolute /// values of the differences to the corresponding bits in the destination. /// Then sums of the absolute differences are returned according to the bit /// fields in the immediate operand. @@ -1534,23 +1515,23 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) /// \code /// // M2 represents bit 2 of the immediate operand /// // M10 represents bits [1:0] of the immediate operand -/// i = M2 * 4 -/// j = M10 * 4 +/// i = M2 * 4; +/// j = M10 * 4; /// for (k = 0; k < 8; k = k + 1) { -/// d0 = abs(X[i + k + 0] - Y[j + 0]) -/// d1 = abs(X[i + k + 1] - Y[j + 1]) -/// d2 = abs(X[i + k + 2] - Y[j + 2]) -/// d3 = abs(X[i + k + 3] - Y[j + 3]) -/// r[k] = d0 + d1 + d2 + d3 +/// d0 = abs(X[i + k + 0] - Y[j + 0]); +/// d1 = abs(X[i + k + 1] - Y[j + 1]); +/// d2 = abs(X[i + k + 2] - Y[j + 2]); +/// d3 = abs(X[i + k + 3] - Y[j + 3]); +/// r[k] = d0 + d1 + d2 + d3; /// } /// \endcode /// \returns A 128-bit integer vector containing the sums of the sets of /// absolute differences between both operands. -#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ +#define _mm_mpsadbw_epu8(X, Y, M) \ (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ - (__v16qi)(__m128i)(Y), (M)); }) + (__v16qi)(__m128i)(Y), (M)) -/// \brief Finds the minimum unsigned 16-bit element in the input 128-bit +/// Finds the minimum unsigned 16-bit element in the input 128-bit /// vector of [8 x u16] and returns it and along with its index. /// /// \headerfile @@ -1604,7 +1585,7 @@ _mm_minpos_epu16(__m128i __V) #define _SIDD_UNIT_MASK 0x40 /* SSE4.2 Packed Comparison Intrinsics. */ -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. @@ -1660,7 +1641,7 @@ _mm_minpos_epu16(__m128i __V) (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. @@ -1714,7 +1695,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. @@ -1775,7 +1756,7 @@ _mm_minpos_epu16(__m128i __V) (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. @@ -1835,7 +1816,7 @@ _mm_minpos_epu16(__m128i __V) (int)(M)) /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. @@ -1885,7 +1866,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns /// 0. @@ -1934,7 +1915,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// @@ -1982,7 +1963,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. @@ -2032,7 +2013,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. @@ -2082,7 +2063,7 @@ _mm_minpos_epu16(__m128i __V) (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. @@ -2137,7 +2118,7 @@ _mm_minpos_epu16(__m128i __V) (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise, /// returns 0. @@ -2191,7 +2172,7 @@ _mm_minpos_epu16(__m128i __V) (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// @@ -2244,7 +2225,7 @@ _mm_minpos_epu16(__m128i __V) (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. @@ -2299,7 +2280,7 @@ _mm_minpos_epu16(__m128i __V) (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M)) -/// \brief Uses the immediate operand \a M to perform a comparison of string +/// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. @@ -2354,7 +2335,7 @@ _mm_minpos_epu16(__m128i __V) (int)(M)) /* SSE4.2 Compare Packed Data -- Greater Than. */ -/// \brief Compares each of the corresponding 64-bit values of the 128-bit +/// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. /// @@ -2374,7 +2355,7 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) } /* SSE4.2 Accumulate CRC32. */ -/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the +/// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned char operand. /// /// \headerfile @@ -2394,7 +2375,7 @@ _mm_crc32_u8(unsigned int __C, unsigned char __D) return __builtin_ia32_crc32qi(__C, __D); } -/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the +/// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned short operand. /// /// \headerfile @@ -2414,7 +2395,7 @@ _mm_crc32_u16(unsigned int __C, unsigned short __D) return __builtin_ia32_crc32hi(__C, __D); } -/// \brief Adds the first unsigned integer operand to the CRC-32C checksum of +/// Adds the first unsigned integer operand to the CRC-32C checksum of /// the second unsigned integer operand. /// /// \headerfile @@ -2435,7 +2416,7 @@ _mm_crc32_u32(unsigned int __C, unsigned int __D) } #ifdef __x86_64__ -/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the +/// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned 64-bit integer operand. /// /// \headerfile @@ -2458,8 +2439,6 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D) #undef __DEFAULT_FN_ATTRS -#ifdef __POPCNT__ #include -#endif -#endif /* _SMMINTRIN_H */ +#endif /* __SMMINTRIN_H */ diff --git a/lib/clang/7.0.0/include/stdalign.h b/lib/clang/8.0.0/include/stdalign.h similarity index 100% rename from lib/clang/7.0.0/include/stdalign.h rename to lib/clang/8.0.0/include/stdalign.h diff --git a/lib/clang/7.0.0/include/stdarg.h b/lib/clang/8.0.0/include/stdarg.h similarity index 100% rename from lib/clang/7.0.0/include/stdarg.h rename to lib/clang/8.0.0/include/stdarg.h diff --git a/lib/clang/7.0.0/include/stdatomic.h b/lib/clang/8.0.0/include/stdatomic.h similarity index 100% rename from lib/clang/7.0.0/include/stdatomic.h rename to lib/clang/8.0.0/include/stdatomic.h diff --git a/lib/clang/7.0.0/include/stdbool.h b/lib/clang/8.0.0/include/stdbool.h similarity index 100% rename from lib/clang/7.0.0/include/stdbool.h rename to lib/clang/8.0.0/include/stdbool.h diff --git a/lib/clang/7.0.0/include/stddef.h b/lib/clang/8.0.0/include/stddef.h similarity index 100% rename from lib/clang/7.0.0/include/stddef.h rename to lib/clang/8.0.0/include/stddef.h diff --git a/lib/clang/7.0.0/include/stdint.h b/lib/clang/8.0.0/include/stdint.h similarity index 100% rename from lib/clang/7.0.0/include/stdint.h rename to lib/clang/8.0.0/include/stdint.h diff --git a/lib/clang/7.0.0/include/stdnoreturn.h b/lib/clang/8.0.0/include/stdnoreturn.h similarity index 100% rename from lib/clang/7.0.0/include/stdnoreturn.h rename to lib/clang/8.0.0/include/stdnoreturn.h diff --git a/lib/clang/7.0.0/include/tbmintrin.h b/lib/clang/8.0.0/include/tbmintrin.h similarity index 100% rename from lib/clang/7.0.0/include/tbmintrin.h rename to lib/clang/8.0.0/include/tbmintrin.h diff --git a/lib/clang/7.0.0/include/tgmath.h b/lib/clang/8.0.0/include/tgmath.h similarity index 100% rename from lib/clang/7.0.0/include/tgmath.h rename to lib/clang/8.0.0/include/tgmath.h diff --git a/lib/clang/7.0.0/include/tmmintrin.h b/lib/clang/8.0.0/include/tmmintrin.h similarity index 89% rename from lib/clang/7.0.0/include/tmmintrin.h rename to lib/clang/8.0.0/include/tmmintrin.h index 97d6148..734cd39 100644 --- a/lib/clang/7.0.0/include/tmmintrin.h +++ b/lib/clang/8.0.0/include/tmmintrin.h @@ -27,9 +27,10 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) -/// \brief Computes the absolute value of each of the packed 8-bit signed +/// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// @@ -41,13 +42,13 @@ /// A 64-bit vector of [8 x i8]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a) { return (__m64)__builtin_ia32_pabsb((__v8qi)__a); } -/// \brief Computes the absolute value of each of the packed 8-bit signed +/// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// @@ -65,7 +66,7 @@ _mm_abs_epi8(__m128i __a) return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); } -/// \brief Computes the absolute value of each of the packed 16-bit signed +/// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// @@ -77,13 +78,13 @@ _mm_abs_epi8(__m128i __a) /// A 64-bit vector of [4 x i16]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a) { return (__m64)__builtin_ia32_pabsw((__v4hi)__a); } -/// \brief Computes the absolute value of each of the packed 16-bit signed +/// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// @@ -101,7 +102,7 @@ _mm_abs_epi16(__m128i __a) return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); } -/// \brief Computes the absolute value of each of the packed 32-bit signed +/// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// @@ -113,13 +114,13 @@ _mm_abs_epi16(__m128i __a) /// A 64-bit vector of [2 x i32]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a) { return (__m64)__builtin_ia32_pabsd((__v2si)__a); } -/// \brief Computes the absolute value of each of the packed 32-bit signed +/// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// @@ -137,7 +138,7 @@ _mm_abs_epi32(__m128i __a) return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); } -/// \brief Concatenates the two 128-bit integer vector operands, and +/// Concatenates the two 128-bit integer vector operands, and /// right-shifts the result by the number of bytes specified in the immediate /// operand. /// @@ -157,11 +158,11 @@ _mm_abs_epi32(__m128i __a) /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 128-bit integer vector containing the concatenated right-shifted /// value. -#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ +#define _mm_alignr_epi8(a, b, n) \ (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)); }) + (__v16qi)(__m128i)(b), (n)) -/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts +/// Concatenates the two 64-bit integer vector operands, and right-shifts /// the result by the number of bytes specified in the immediate operand. /// /// \headerfile @@ -180,10 +181,10 @@ _mm_abs_epi32(__m128i __a) /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. -#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); }) +#define _mm_alignr_pi8(a, b, n) \ + (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. /// /// \headerfile @@ -206,7 +207,7 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [4 x i32]. /// /// \headerfile @@ -229,7 +230,7 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [4 x i16]. /// /// \headerfile @@ -246,13 +247,13 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [2 x i32]. /// /// \headerfile @@ -269,13 +270,13 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to /// 0x8000. @@ -300,7 +301,7 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to /// 0x8000. @@ -319,13 +320,13 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. /// /// \headerfile @@ -348,7 +349,7 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [4 x i32]. /// /// \headerfile @@ -371,7 +372,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [4 x i16]. /// /// \headerfile @@ -388,13 +389,13 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [2 x i32]. /// /// \headerfile @@ -411,13 +412,13 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are /// saturated to 0x8000. @@ -442,7 +443,7 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 +/// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are /// saturated to 0x8000. @@ -461,13 +462,13 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); } -/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer +/// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to @@ -501,7 +502,7 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } -/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer +/// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to @@ -525,13 +526,13 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b) /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); } -/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit +/// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// @@ -551,7 +552,7 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } -/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit +/// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// @@ -565,13 +566,13 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); } -/// \brief Copies the 8-bit integers from a 128-bit integer vector to the +/// Copies the 8-bit integers from a 128-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// @@ -597,7 +598,7 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } -/// \brief Copies the 8-bit integers from a 64-bit integer vector to the +/// Copies the 8-bit integers from a 64-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// @@ -616,13 +617,13 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// destination. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); } -/// \brief For each 8-bit integer in the first source operand, perform one of +/// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's @@ -648,7 +649,7 @@ _mm_sign_epi8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } -/// \brief For each 16-bit integer in the first source operand, perform one of +/// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's @@ -674,7 +675,7 @@ _mm_sign_epi16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } -/// \brief For each 32-bit integer in the first source operand, perform one of +/// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's @@ -700,7 +701,7 @@ _mm_sign_epi32(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } -/// \brief For each 8-bit integer in the first source operand, perform one of +/// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's @@ -720,13 +721,13 @@ _mm_sign_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); } -/// \brief For each 16-bit integer in the first source operand, perform one of +/// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's @@ -746,13 +747,13 @@ _mm_sign_pi8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); } -/// \brief For each 32-bit integer in the first source operand, perform one of +/// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's @@ -772,12 +773,13 @@ _mm_sign_pi16(__m64 __a, __m64 __b) /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_MMX #endif /* __TMMINTRIN_H */ diff --git a/lib/clang/7.0.0/include/unwind.h b/lib/clang/8.0.0/include/unwind.h similarity index 99% rename from lib/clang/7.0.0/include/unwind.h rename to lib/clang/8.0.0/include/unwind.h index 345fa4d..0e8317e 100644 --- a/lib/clang/7.0.0/include/unwind.h +++ b/lib/clang/8.0.0/include/unwind.h @@ -154,8 +154,12 @@ struct _Unwind_Control_Block { struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; +#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__) + _Unwind_Word private_[6]; +#else _Unwind_Word private_1; _Unwind_Word private_2; +#endif /* The Itanium ABI requires that _Unwind_Exception objects are "double-word * aligned". GCC has interpreted this to mean "use the maximum useful * alignment for the target"; so do we. */ diff --git a/lib/clang/7.0.0/include/vadefs.h b/lib/clang/8.0.0/include/vadefs.h similarity index 100% rename from lib/clang/7.0.0/include/vadefs.h rename to lib/clang/8.0.0/include/vadefs.h diff --git a/lib/clang/7.0.0/include/vaesintrin.h b/lib/clang/8.0.0/include/vaesintrin.h similarity index 96% rename from lib/clang/7.0.0/include/vaesintrin.h rename to lib/clang/8.0.0/include/vaesintrin.h index efbb8a5..e4174bb 100644 --- a/lib/clang/7.0.0/include/vaesintrin.h +++ b/lib/clang/8.0.0/include/vaesintrin.h @@ -29,10 +29,10 @@ #define __VAESINTRIN_H /* Default attributes for YMM forms. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/clang/7.0.0/include/varargs.h b/lib/clang/8.0.0/include/varargs.h similarity index 100% rename from lib/clang/7.0.0/include/varargs.h rename to lib/clang/8.0.0/include/varargs.h diff --git a/lib/clang/7.0.0/include/vecintrin.h b/lib/clang/8.0.0/include/vecintrin.h similarity index 100% rename from lib/clang/7.0.0/include/vecintrin.h rename to lib/clang/8.0.0/include/vecintrin.h diff --git a/lib/clang/7.0.0/include/vpclmulqdqintrin.h b/lib/clang/8.0.0/include/vpclmulqdqintrin.h similarity index 86% rename from lib/clang/7.0.0/include/vpclmulqdqintrin.h rename to lib/clang/8.0.0/include/vpclmulqdqintrin.h index 21cda22..86174a4 100644 --- a/lib/clang/7.0.0/include/vpclmulqdqintrin.h +++ b/lib/clang/8.0.0/include/vpclmulqdqintrin.h @@ -28,15 +28,15 @@ #ifndef __VPCLMULQDQINTRIN_H #define __VPCLMULQDQINTRIN_H -#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \ +#define _mm256_clmulepi64_epi128(A, B, I) \ (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \ +#define _mm512_clmulepi64_epi128(A, B, I) \ (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ - (char)(I)); }) + (char)(I)) -#endif // __VPCLMULQDQINTRIN_H +#endif /* __VPCLMULQDQINTRIN_H */ diff --git a/lib/clang/8.0.0/include/waitpkgintrin.h b/lib/clang/8.0.0/include/waitpkgintrin.h new file mode 100644 index 0000000..e29d6cf --- /dev/null +++ b/lib/clang/8.0.0/include/waitpkgintrin.h @@ -0,0 +1,56 @@ +/*===----------------------- waitpkgintrin.h - WAITPKG --------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __WAITPKGINTRIN_H +#define __WAITPKGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("waitpkg"))) + +static __inline__ void __DEFAULT_FN_ATTRS +_umonitor (void * __address) +{ + __builtin_ia32_umonitor (__address); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_umwait (unsigned int __control, unsigned long long __counter) +{ + return __builtin_ia32_umwait (__control, + (unsigned int)(__counter >> 32), (unsigned int)__counter); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_tpause (unsigned int __control, unsigned long long __counter) +{ + return __builtin_ia32_tpause (__control, + (unsigned int)(__counter >> 32), (unsigned int)__counter); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __WAITPKGINTRIN_H */ diff --git a/lib/clang/7.0.0/include/wbnoinvdintrin.h b/lib/clang/8.0.0/include/wbnoinvdintrin.h similarity index 96% rename from lib/clang/7.0.0/include/wbnoinvdintrin.h rename to lib/clang/8.0.0/include/wbnoinvdintrin.h index 0852dc1..cad8336 100644 --- a/lib/clang/7.0.0/include/wbnoinvdintrin.h +++ b/lib/clang/8.0.0/include/wbnoinvdintrin.h @@ -21,7 +21,7 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif diff --git a/lib/clang/7.0.0/include/wmmintrin.h b/lib/clang/8.0.0/include/wmmintrin.h similarity index 94% rename from lib/clang/7.0.0/include/wmmintrin.h rename to lib/clang/8.0.0/include/wmmintrin.h index a2d9310..569a8d8 100644 --- a/lib/clang/7.0.0/include/wmmintrin.h +++ b/lib/clang/8.0.0/include/wmmintrin.h @@ -21,8 +21,8 @@ *===-----------------------------------------------------------------------=== */ -#ifndef _WMMINTRIN_H -#define _WMMINTRIN_H +#ifndef __WMMINTRIN_H +#define __WMMINTRIN_H #include @@ -30,4 +30,4 @@ #include <__wmmintrin_pclmul.h> -#endif /* _WMMINTRIN_H */ +#endif /* __WMMINTRIN_H */ diff --git a/lib/clang/7.0.0/include/x86intrin.h b/lib/clang/8.0.0/include/x86intrin.h similarity index 75% rename from lib/clang/7.0.0/include/x86intrin.h rename to lib/clang/8.0.0/include/x86intrin.h index 53151f7..728c58c 100644 --- a/lib/clang/7.0.0/include/x86intrin.h +++ b/lib/clang/8.0.0/include/x86intrin.h @@ -32,26 +32,6 @@ #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) -#include -#endif - -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) -#include -#endif - -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) -#include -#endif - -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) -#include -#endif - -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) -#include -#endif - #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__) #include #endif @@ -76,10 +56,6 @@ #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) -#include -#endif - #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__) #include #endif @@ -88,8 +64,5 @@ #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) -#include -#endif #endif /* __X86INTRIN_H */ diff --git a/lib/clang/7.0.0/include/xmmintrin.h b/lib/clang/8.0.0/include/xmmintrin.h similarity index 88% rename from lib/clang/7.0.0/include/xmmintrin.h rename to lib/clang/8.0.0/include/xmmintrin.h index 5e8901c..17af172 100644 --- a/lib/clang/7.0.0/include/xmmintrin.h +++ b/lib/clang/8.0.0/include/xmmintrin.h @@ -40,9 +40,10 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) -/// \brief Adds the 32-bit float values in the low-order bits of the operands. +/// Adds the 32-bit float values in the low-order bits of the operands. /// /// \headerfile /// @@ -64,7 +65,7 @@ _mm_add_ss(__m128 __a, __m128 __b) return __a; } -/// \brief Adds two 128-bit vectors of [4 x float], and returns the results of +/// Adds two 128-bit vectors of [4 x float], and returns the results of /// the addition. /// /// \headerfile @@ -83,7 +84,7 @@ _mm_add_ps(__m128 __a, __m128 __b) return (__m128)((__v4sf)__a + (__v4sf)__b); } -/// \brief Subtracts the 32-bit float value in the low-order bits of the second +/// Subtracts the 32-bit float value in the low-order bits of the second /// operand from the corresponding value in the first operand. /// /// \headerfile @@ -106,7 +107,7 @@ _mm_sub_ss(__m128 __a, __m128 __b) return __a; } -/// \brief Subtracts each of the values of the second operand from the first +/// Subtracts each of the values of the second operand from the first /// operand, both of which are 128-bit vectors of [4 x float] and returns /// the results of the subtraction. /// @@ -126,7 +127,7 @@ _mm_sub_ps(__m128 __a, __m128 __b) return (__m128)((__v4sf)__a - (__v4sf)__b); } -/// \brief Multiplies two 32-bit float values in the low-order bits of the +/// Multiplies two 32-bit float values in the low-order bits of the /// operands. /// /// \headerfile @@ -149,7 +150,7 @@ _mm_mul_ss(__m128 __a, __m128 __b) return __a; } -/// \brief Multiplies two 128-bit vectors of [4 x float] and returns the +/// Multiplies two 128-bit vectors of [4 x float] and returns the /// results of the multiplication. /// /// \headerfile @@ -168,7 +169,7 @@ _mm_mul_ps(__m128 __a, __m128 __b) return (__m128)((__v4sf)__a * (__v4sf)__b); } -/// \brief Divides the value in the low-order 32 bits of the first operand by +/// Divides the value in the low-order 32 bits of the first operand by /// the corresponding value in the second operand. /// /// \headerfile @@ -191,7 +192,7 @@ _mm_div_ss(__m128 __a, __m128 __b) return __a; } -/// \brief Divides two 128-bit vectors of [4 x float]. +/// Divides two 128-bit vectors of [4 x float]. /// /// \headerfile /// @@ -209,7 +210,7 @@ _mm_div_ps(__m128 __a, __m128 __b) return (__m128)((__v4sf)__a / (__v4sf)__b); } -/// \brief Calculates the square root of the value stored in the low-order bits +/// Calculates the square root of the value stored in the low-order bits /// of a 128-bit vector of [4 x float]. /// /// \headerfile @@ -224,11 +225,10 @@ _mm_div_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); } -/// \brief Calculates the square roots of the values stored in a 128-bit vector +/// Calculates the square roots of the values stored in a 128-bit vector /// of [4 x float]. /// /// \headerfile @@ -245,7 +245,7 @@ _mm_sqrt_ps(__m128 __a) return __builtin_ia32_sqrtps((__v4sf)__a); } -/// \brief Calculates the approximate reciprocal of the value stored in the +/// Calculates the approximate reciprocal of the value stored in the /// low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile @@ -260,11 +260,10 @@ _mm_sqrt_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rcpss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_rcpss((__v4sf)__a); } -/// \brief Calculates the approximate reciprocals of the values stored in a +/// Calculates the approximate reciprocals of the values stored in a /// 128-bit vector of [4 x float]. /// /// \headerfile @@ -278,10 +277,10 @@ _mm_rcp_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { - return __builtin_ia32_rcpps((__v4sf)__a); + return (__m128)__builtin_ia32_rcpps((__v4sf)__a); } -/// \brief Calculates the approximate reciprocal of the square root of the value +/// Calculates the approximate reciprocal of the square root of the value /// stored in the low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile @@ -297,11 +296,10 @@ _mm_rcp_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return __builtin_ia32_rsqrtss((__v4sf)__a); } -/// \brief Calculates the approximate reciprocals of the square roots of the +/// Calculates the approximate reciprocals of the square roots of the /// values stored in a 128-bit vector of [4 x float]. /// /// \headerfile @@ -318,7 +316,7 @@ _mm_rsqrt_ps(__m128 __a) return __builtin_ia32_rsqrtps((__v4sf)__a); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands and returns the lesser value in the low-order bits of the /// vector of [4 x float]. /// @@ -341,7 +339,7 @@ _mm_min_ss(__m128 __a, __m128 __b) return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 128-bit vectors of [4 x float] and returns the lesser +/// Compares two 128-bit vectors of [4 x float] and returns the lesser /// of each pair of values. /// /// \headerfile @@ -360,7 +358,7 @@ _mm_min_ps(__m128 __a, __m128 __b) return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands and returns the greater value in the low-order bits of a 128-bit /// vector of [4 x float]. /// @@ -383,7 +381,7 @@ _mm_max_ss(__m128 __a, __m128 __b) return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 128-bit vectors of [4 x float] and returns the greater +/// Compares two 128-bit vectors of [4 x float] and returns the greater /// of each pair of values. /// /// \headerfile @@ -402,7 +400,7 @@ _mm_max_ps(__m128 __a, __m128 __b) return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float]. +/// Performs a bitwise AND of two 128-bit vectors of [4 x float]. /// /// \headerfile /// @@ -420,7 +418,7 @@ _mm_and_ps(__m128 __a, __m128 __b) return (__m128)((__v4su)__a & (__v4su)__b); } -/// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float], using +/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using /// the one's complement of the values contained in the first source /// operand. /// @@ -442,7 +440,7 @@ _mm_andnot_ps(__m128 __a, __m128 __b) return (__m128)(~(__v4su)__a & (__v4su)__b); } -/// \brief Performs a bitwise OR of two 128-bit vectors of [4 x float]. +/// Performs a bitwise OR of two 128-bit vectors of [4 x float]. /// /// \headerfile /// @@ -460,7 +458,7 @@ _mm_or_ps(__m128 __a, __m128 __b) return (__m128)((__v4su)__a | (__v4su)__b); } -/// \brief Performs a bitwise exclusive OR of two 128-bit vectors of +/// Performs a bitwise exclusive OR of two 128-bit vectors of /// [4 x float]. /// /// \headerfile @@ -479,7 +477,7 @@ _mm_xor_ps(__m128 __a, __m128 __b) return (__m128)((__v4su)__a ^ (__v4su)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands for equality and returns the result of the comparison in the /// low-order bits of a vector [4 x float]. /// @@ -501,7 +499,7 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for equality. /// /// \headerfile @@ -519,7 +517,7 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than the /// corresponding value in the second operand and returns the result of the /// comparison in the low-order bits of a vector of [4 x float]. @@ -542,7 +540,7 @@ _mm_cmplt_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than those in the second operand. /// @@ -561,7 +559,7 @@ _mm_cmplt_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than or /// equal to the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of @@ -585,7 +583,7 @@ _mm_cmple_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// @@ -604,7 +602,7 @@ _mm_cmple_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// the corresponding value in the second operand and returns the result of /// the comparison in the low-order bits of a vector of [4 x float]. @@ -629,7 +627,7 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than those in the second operand. /// @@ -648,7 +646,7 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// or equal to the corresponding value in the second operand and returns /// the result of the comparison in the low-order bits of a vector of @@ -674,7 +672,7 @@ _mm_cmpge_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// @@ -693,7 +691,7 @@ _mm_cmpge_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands for inequality and returns the result of the comparison in the /// low-order bits of a vector of [4 x float]. /// @@ -716,7 +714,7 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for inequality. /// /// \headerfile @@ -735,7 +733,7 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// the corresponding value in the second operand and returns the result of /// the comparison in the low-order bits of a vector of [4 x float]. @@ -759,7 +757,7 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than those in the second operand. /// @@ -779,7 +777,7 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// or equal to the corresponding value in the second operand and returns /// the result of the comparison in the low-order bits of a vector of @@ -804,7 +802,7 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// @@ -824,7 +822,7 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of @@ -851,7 +849,7 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than those in the second operand. /// @@ -871,7 +869,7 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than or equal to the corresponding value in the second operand and /// returns the result of the comparison in the low-order bits of a vector @@ -898,7 +896,7 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// @@ -918,7 +916,7 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is ordered with /// respect to the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of @@ -943,7 +941,7 @@ _mm_cmpord_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// @@ -963,7 +961,7 @@ _mm_cmpord_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is unordered /// with respect to the corresponding value in the second operand and /// returns the result of the comparison in the low-order bits of a vector @@ -988,7 +986,7 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares each of the corresponding 32-bit float values of the +/// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// @@ -1008,7 +1006,7 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b) return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands for equality and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. @@ -1032,7 +1030,7 @@ _mm_comieq_ss(__m128 __a, __m128 __b) return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than the second /// operand and returns the result of the comparison. /// @@ -1057,7 +1055,7 @@ _mm_comilt_ss(__m128 __a, __m128 __b) return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than or equal to the /// second operand and returns the result of the comparison. /// @@ -1081,7 +1079,7 @@ _mm_comile_ss(__m128 __a, __m128 __b) return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than the second /// operand and returns the result of the comparison. /// @@ -1105,7 +1103,7 @@ _mm_comigt_ss(__m128 __a, __m128 __b) return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than or equal to /// the second operand and returns the result of the comparison. /// @@ -1129,7 +1127,7 @@ _mm_comige_ss(__m128 __a, __m128 __b) return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b); } -/// \brief Compares two 32-bit float values in the low-order bits of both +/// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is not equal to the second /// operand and returns the result of the comparison. /// @@ -1153,7 +1151,7 @@ _mm_comineq_ss(__m128 __a, __m128 __b) return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine equality and returns /// the result of the comparison. /// @@ -1177,7 +1175,7 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than the second operand and returns the result of the comparison. /// @@ -1201,7 +1199,7 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than or equal to the second operand and returns the result of the /// comparison. @@ -1226,7 +1224,7 @@ _mm_ucomile_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than the second operand and returns the result of the /// comparison. @@ -1251,7 +1249,7 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than or equal to the second operand and returns the result of /// the comparison. @@ -1276,7 +1274,7 @@ _mm_ucomige_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b); } -/// \brief Performs an unordered comparison of two 32-bit float values using +/// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine inequality and returns /// the result of the comparison. /// @@ -1300,7 +1298,7 @@ _mm_ucomineq_ss(__m128 __a, __m128 __b) return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b); } -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile @@ -1318,7 +1316,7 @@ _mm_cvtss_si32(__m128 __a) return __builtin_ia32_cvtss2si((__v4sf)__a); } -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile @@ -1338,7 +1336,7 @@ _mm_cvt_ss2si(__m128 __a) #ifdef __x86_64__ -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer. /// /// \headerfile @@ -1358,7 +1356,7 @@ _mm_cvtss_si64(__m128 __a) #endif -/// \brief Converts two low-order float values in a 128-bit vector of +/// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile @@ -1368,13 +1366,13 @@ _mm_cvtss_si64(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a); } -/// \brief Converts two low-order float values in a 128-bit vector of +/// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile @@ -1384,13 +1382,13 @@ _mm_cvtps_pi32(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvt_ps2pi(__m128 __a) { return _mm_cvtps_pi32(__a); } -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// @@ -1409,7 +1407,7 @@ _mm_cvttss_si32(__m128 __a) return __builtin_ia32_cvttss2si((__v4sf)__a); } -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// @@ -1429,7 +1427,7 @@ _mm_cvtt_ss2si(__m128 __a) } #ifdef __x86_64__ -/// \brief Converts a float value contained in the lower 32 bits of a vector of +/// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer, truncating the result when it is /// inexact. /// @@ -1449,7 +1447,7 @@ _mm_cvttss_si64(__m128 __a) } #endif -/// \brief Converts two low-order float values in a 128-bit vector of +/// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32], truncating the result /// when it is inexact. /// @@ -1461,13 +1459,13 @@ _mm_cvttss_si64(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a); } -/// \brief Converts two low-order float values in a 128-bit vector of [4 x +/// Converts two low-order float values in a 128-bit vector of [4 x /// float] into a 64-bit vector of [2 x i32], truncating the result when it /// is inexact. /// @@ -1478,13 +1476,13 @@ _mm_cvttps_pi32(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtt_ps2pi(__m128 __a) { return _mm_cvttps_pi32(__a); } -/// \brief Converts a 32-bit signed integer value into a floating point value +/// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination vector are copied from the /// corresponding elements in the first operand. @@ -1507,7 +1505,7 @@ _mm_cvtsi32_ss(__m128 __a, int __b) return __a; } -/// \brief Converts a 32-bit signed integer value into a floating point value +/// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. @@ -1531,7 +1529,7 @@ _mm_cvt_si2ss(__m128 __a, int __b) #ifdef __x86_64__ -/// \brief Converts a 64-bit signed integer value into a floating point value +/// Converts a 64-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. @@ -1556,7 +1554,7 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) #endif -/// \brief Converts two elements of a 64-bit vector of [2 x i32] into two +/// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. @@ -1573,13 +1571,13 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_ps(__m128 __a, __m64 __b) { return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b); } -/// \brief Converts two elements of a 64-bit vector of [2 x i32] into two +/// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. @@ -1596,13 +1594,13 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value from the second operand. The upper 64 bits are copied /// from the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); } -/// \brief Extracts a float value contained in the lower 32 bits of a vector of +/// Extracts a float value contained in the lower 32 bits of a vector of /// [4 x float]. /// /// \headerfile @@ -1619,7 +1617,7 @@ _mm_cvtss_f32(__m128 __a) return __a[0]; } -/// \brief Loads two packed float values from the address \a __p into the +/// Loads two packed float values from the address \a __p into the /// high-order bits of a 128-bit vector of [4 x float]. The low-order bits /// are copied from the low-order bits of the first operand. /// @@ -1646,7 +1644,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p) return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); } -/// \brief Loads two packed float values from the address \a __p into the +/// Loads two packed float values from the address \a __p into the /// low-order bits of a 128-bit vector of [4 x float]. The high-order bits /// are copied from the high-order bits of the first operand. /// @@ -1673,7 +1671,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p) return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); } -/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower +/// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the single-precision /// floating-point value loaded from a specified memory location. The upper /// 96 bits are set to zero. @@ -1695,10 +1693,10 @@ _mm_load_ss(const float *__p) float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((struct __mm_load_ss_struct*)__p)->__u; - return (__m128){ __u, 0, 0, 0 }; + return __extension__ (__m128){ __u, 0, 0, 0 }; } -/// \brief Loads a 32-bit float value and duplicates it to all four vector +/// Loads a 32-bit float value and duplicates it to all four vector /// elements of a 128-bit vector of [4 x float]. /// /// \headerfile @@ -1717,12 +1715,12 @@ _mm_load1_ps(const float *__p) float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((struct __mm_load1_ps_struct*)__p)->__u; - return (__m128){ __u, __u, __u, __u }; + return __extension__ (__m128){ __u, __u, __u, __u }; } #define _mm_load_ps1(p) _mm_load1_ps(p) -/// \brief Loads a 128-bit floating-point vector of [4 x float] from an aligned +/// Loads a 128-bit floating-point vector of [4 x float] from an aligned /// memory location. /// /// \headerfile @@ -1739,7 +1737,7 @@ _mm_load_ps(const float *__p) return *(__m128*)__p; } -/// \brief Loads a 128-bit floating-point vector of [4 x float] from an +/// Loads a 128-bit floating-point vector of [4 x float] from an /// unaligned memory location. /// /// \headerfile @@ -1759,7 +1757,7 @@ _mm_loadu_ps(const float *__p) return ((struct __loadu_ps*)__p)->__v; } -/// \brief Loads four packed float values, in reverse order, from an aligned +/// Loads four packed float values, in reverse order, from an aligned /// memory location to 32-bit elements in a 128-bit vector of [4 x float]. /// /// \headerfile @@ -1779,7 +1777,7 @@ _mm_loadr_ps(const float *__p) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); } -/// \brief Create a 128-bit vector of [4 x float] with undefined values. +/// Create a 128-bit vector of [4 x float] with undefined values. /// /// \headerfile /// @@ -1792,7 +1790,7 @@ _mm_undefined_ps(void) return (__m128)__builtin_ia32_undef128(); } -/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower +/// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the specified single-precision /// floating-point value. The upper 96 bits are set to zero. /// @@ -1809,10 +1807,10 @@ _mm_undefined_ps(void) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { - return (__m128){ __w, 0, 0, 0 }; + return __extension__ (__m128){ __w, 0, 0, 0 }; } -/// \brief Constructs a 128-bit floating-point vector of [4 x float], with each +/// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// @@ -1827,11 +1825,11 @@ _mm_set_ss(float __w) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w) { - return (__m128){ __w, __w, __w, __w }; + return __extension__ (__m128){ __w, __w, __w, __w }; } /* Microsoft specific. */ -/// \brief Constructs a 128-bit floating-point vector of [4 x float], with each +/// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// @@ -1849,7 +1847,7 @@ _mm_set_ps1(float __w) return _mm_set1_ps(__w); } -/// \brief Constructs a 128-bit floating-point vector of [4 x float] +/// Constructs a 128-bit floating-point vector of [4 x float] /// initialized with the specified single-precision floating-point values. /// /// \headerfile @@ -1873,10 +1871,10 @@ _mm_set_ps1(float __w) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w) { - return (__m128){ __w, __x, __y, __z }; + return __extension__ (__m128){ __w, __x, __y, __z }; } -/// \brief Constructs a 128-bit floating-point vector of [4 x float], +/// Constructs a 128-bit floating-point vector of [4 x float], /// initialized in reverse order with the specified 32-bit single-precision /// float-point values. /// @@ -1901,10 +1899,10 @@ _mm_set_ps(float __z, float __y, float __x, float __w) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w) { - return (__m128){ __z, __y, __x, __w }; + return __extension__ (__m128){ __z, __y, __x, __w }; } -/// \brief Constructs a 128-bit floating-point vector of [4 x float] initialized +/// Constructs a 128-bit floating-point vector of [4 x float] initialized /// to zero. /// /// \headerfile @@ -1916,10 +1914,10 @@ _mm_setr_ps(float __z, float __y, float __x, float __w) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void) { - return (__m128){ 0, 0, 0, 0 }; + return __extension__ (__m128){ 0, 0, 0, 0 }; } -/// \brief Stores the upper 64 bits of a 128-bit vector of [4 x float] to a +/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile @@ -1936,7 +1934,7 @@ _mm_storeh_pi(__m64 *__p, __m128 __a) __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a); } -/// \brief Stores the lower 64 bits of a 128-bit vector of [4 x float] to a +/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile @@ -1953,7 +1951,7 @@ _mm_storel_pi(__m64 *__p, __m128 __a) __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a); } -/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] to a +/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile @@ -1973,7 +1971,7 @@ _mm_store_ss(float *__p, __m128 __a) ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; } -/// \brief Stores a 128-bit vector of [4 x float] to an unaligned memory +/// Stores a 128-bit vector of [4 x float] to an unaligned memory /// location. /// /// \headerfile @@ -1994,7 +1992,7 @@ _mm_storeu_ps(float *__p, __m128 __a) ((struct __storeu_ps*)__p)->__v = __a; } -/// \brief Stores a 128-bit vector of [4 x float] into an aligned memory +/// Stores a 128-bit vector of [4 x float] into an aligned memory /// location. /// /// \headerfile @@ -2012,7 +2010,7 @@ _mm_store_ps(float *__p, __m128 __a) *(__m128*)__p = __a; } -/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into +/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile @@ -2032,7 +2030,7 @@ _mm_store1_ps(float *__p, __m128 __a) _mm_store_ps(__p, __a); } -/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into +/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile @@ -2048,10 +2046,10 @@ _mm_store1_ps(float *__p, __m128 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a) { - return _mm_store1_ps(__p, __a); + _mm_store1_ps(__p, __a); } -/// \brief Stores float values from a 128-bit vector of [4 x float] to an +/// Stores float values from a 128-bit vector of [4 x float] to an /// aligned memory location in reverse order. /// /// \headerfile @@ -2082,7 +2080,7 @@ _mm_storer_ps(float *__p, __m128 __a) /* FIXME: We have to #define this because "sel" must be a constant integer, and Sema doesn't do any form of constant propagation yet. */ -/// \brief Loads one cache line of data from the specified address to a location +/// Loads one cache line of data from the specified address to a location /// closer to the processor. /// /// \headerfile @@ -2110,7 +2108,7 @@ _mm_storer_ps(float *__p, __m128 __a) ((sel) >> 2) & 1, (sel) & 0x3)) #endif -/// \brief Stores a 64-bit integer in the specified aligned memory location. To +/// Stores a 64-bit integer in the specified aligned memory location. To /// minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// @@ -2122,13 +2120,13 @@ _mm_storer_ps(float *__p, __m128 __a) /// A pointer to an aligned memory location used to store the register value. /// \param __a /// A 64-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_stream_pi(__m64 *__p, __m64 __a) { __builtin_ia32_movntq(__p, __a); } -/// \brief Moves packed float values from a 128-bit vector of [4 x float] to a +/// Moves packed float values from a 128-bit vector of [4 x float] to a /// 128-bit aligned memory location. To minimize caching, the data is flagged /// as non-temporal (unlikely to be used again soon). /// @@ -2151,7 +2149,7 @@ _mm_stream_ps(float *__p, __m128 __a) extern "C" { #endif -/// \brief Forces strong memory ordering (serialization) between store +/// Forces strong memory ordering (serialization) between store /// instructions preceding this instruction and store instructions following /// this instruction, ensuring the system completes all previous stores /// before executing subsequent stores. @@ -2166,7 +2164,7 @@ void _mm_sfence(void); } // extern "C" #endif -/// \brief Extracts 16-bit element from a 64-bit vector of [4 x i16] and +/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and /// returns it, as specified by the immediate integer operand. /// /// \headerfile @@ -2186,10 +2184,10 @@ void _mm_sfence(void); /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. -#define _mm_extract_pi16(a, n) __extension__ ({ \ - (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); }) +#define _mm_extract_pi16(a, n) \ + (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n) -/// \brief Copies data from the 64-bit vector of [4 x i16] to the destination, +/// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset /// specified by the immediate operand \a n. /// @@ -2217,10 +2215,10 @@ void _mm_sfence(void); /// bits in operand \a a. /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. -#define _mm_insert_pi16(a, d, n) __extension__ ({ \ - (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); }) +#define _mm_insert_pi16(a, d, n) \ + (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n) -/// \brief Compares each of the corresponding packed 16-bit integer values of +/// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// @@ -2233,13 +2231,13 @@ void _mm_sfence(void); /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); } -/// \brief Compares each of the corresponding packed 8-bit unsigned integer +/// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// @@ -2252,13 +2250,13 @@ _mm_max_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); } -/// \brief Compares each of the corresponding packed 16-bit integer values of +/// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// @@ -2271,13 +2269,13 @@ _mm_max_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); } -/// \brief Compares each of the corresponding packed 8-bit unsigned integer +/// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// @@ -2290,13 +2288,13 @@ _mm_min_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); } -/// \brief Takes the most significant bit from each 8-bit element in a 64-bit +/// Takes the most significant bit from each 8-bit element in a 64-bit /// integer vector to create an 8-bit mask value. Zero-extends the value to /// 32-bit integer and writes it to the destination. /// @@ -2308,13 +2306,13 @@ _mm_min_pu8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS_MMX _mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb((__v8qi)__a); } -/// \brief Multiplies packed 16-bit unsigned integer values and writes the +/// Multiplies packed 16-bit unsigned integer values and writes the /// high-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. /// @@ -2327,13 +2325,13 @@ _mm_movemask_pi8(__m64 __a) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhi_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); } -/// \brief Shuffles the 4 16-bit integers from a 64-bit integer vector to the +/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the /// destination, as specified by the immediate value operand. /// /// \headerfile @@ -2364,10 +2362,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// 10: assigned from bits [47:32] of \a a. \n /// 11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. -#define _mm_shuffle_pi16(a, n) __extension__ ({ \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); }) +#define _mm_shuffle_pi16(a, n) \ + (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) -/// \brief Conditionally copies the values from each 8-bit element in the first +/// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as /// specified by the most significant bit in the corresponding element in the /// second 64-bit integer vector operand. @@ -2390,13 +2388,13 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// A pointer to a 64-bit memory location that will receive the conditionally /// copied integer values. The address of the memory location does not have /// to be aligned. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) { __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); } -/// \brief Computes the rounded averages of the packed unsigned 8-bit integer +/// Computes the rounded averages of the packed unsigned 8-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// @@ -2409,13 +2407,13 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); } -/// \brief Computes the rounded averages of the packed unsigned 16-bit integer +/// Computes the rounded averages of the packed unsigned 16-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// @@ -2428,13 +2426,13 @@ _mm_avg_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); } -/// \brief Subtracts the corresponding 8-bit unsigned integer values of the two +/// Subtracts the corresponding 8-bit unsigned integer values of the two /// 64-bit vector operands and computes the absolute value for each of the /// difference. Then sum of the 8 absolute differences is written to the /// bits [15:0] of the destination; the remaining bits [63:16] are cleared. @@ -2450,7 +2448,7 @@ _mm_avg_pu16(__m64 __a, __m64 __b) /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the /// sets of absolute differences between both operands. The upper bits are /// cleared. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sad_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); @@ -2460,7 +2458,7 @@ _mm_sad_pu8(__m64 __a, __m64 __b) extern "C" { #endif -/// \brief Returns the contents of the MXCSR register as a 32-bit unsigned +/// Returns the contents of the MXCSR register as a 32-bit unsigned /// integer value. /// /// There are several groups of macros associated with this @@ -2495,10 +2493,14 @@ extern "C" { /// /// For example, the following expression checks if an overflow exception has /// occurred: +/// \code /// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW ) +/// \endcode /// /// The following expression gets the current rounding mode: +/// \code /// _MM_GET_ROUNDING_MODE() +/// \endcode /// /// \headerfile /// @@ -2508,7 +2510,7 @@ extern "C" { /// register. unsigned int _mm_getcsr(void); -/// \brief Sets the MXCSR register with the 32-bit unsigned integer value. +/// Sets the MXCSR register with the 32-bit unsigned integer value. /// /// There are several groups of macros associated with this intrinsic, /// including: @@ -2566,7 +2568,7 @@ void _mm_setcsr(unsigned int __i); } // extern "C" #endif -/// \brief Selects 4 float values from the 128-bit operands of [4 x float], as +/// Selects 4 float values from the 128-bit operands of [4 x float], as /// specified by the immediate value operand. /// /// \headerfile @@ -2602,14 +2604,11 @@ void _mm_setcsr(unsigned int __i); /// 10: Bits [95:64] copied from the specified operand. \n /// 11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. -#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ - 0 + (((mask) >> 0) & 0x3), \ - 0 + (((mask) >> 2) & 0x3), \ - 4 + (((mask) >> 4) & 0x3), \ - 4 + (((mask) >> 6) & 0x3)); }) - -/// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of +#define _mm_shuffle_ps(a, b, mask) \ + (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask)) + +/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile @@ -2631,7 +2630,7 @@ _mm_unpackhi_ps(__m128 __a, __m128 __b) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7); } -/// \brief Unpacks the low-order (index 0,1) values from two 128-bit vectors of +/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile @@ -2653,7 +2652,7 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5); } -/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower +/// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits are set to the lower 32 bits of the second parameter. The upper /// 96 bits are set to the upper 96 bits of the first parameter. /// @@ -2672,10 +2671,11 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b) { - return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 4, 1, 2, 3); + __a[0] = __b[0]; + return __a; } -/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower +/// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the upper 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// @@ -2696,7 +2696,7 @@ _mm_movehl_ps(__m128 __a, __m128 __b) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3); } -/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower +/// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the lower 64 bits of the first parameter. The upper /// 64 bits are set to the lower 64 bits of the second parameter. /// @@ -2717,7 +2717,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5); } -/// \brief Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x +/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x /// float]. /// /// \headerfile @@ -2729,7 +2729,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b) /// from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi16_ps(__m64 __a) { __m64 __b, __c; @@ -2747,7 +2747,7 @@ _mm_cvtpi16_ps(__m64 __a) return __r; } -/// \brief Converts a 64-bit vector of 16-bit unsigned integer values into a +/// Converts a 64-bit vector of 16-bit unsigned integer values into a /// 128-bit vector of [4 x float]. /// /// \headerfile @@ -2759,7 +2759,7 @@ _mm_cvtpi16_ps(__m64 __a) /// destination are copied from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu16_ps(__m64 __a) { __m64 __b, __c; @@ -2776,7 +2776,7 @@ _mm_cvtpu16_ps(__m64 __a) return __r; } -/// \brief Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] +/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] /// into a 128-bit vector of [4 x float]. /// /// \headerfile @@ -2788,7 +2788,7 @@ _mm_cvtpu16_ps(__m64 __a) /// from the corresponding lower 4 elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi8_ps(__m64 __a) { __m64 __b; @@ -2800,7 +2800,7 @@ _mm_cvtpi8_ps(__m64 __a) return _mm_cvtpi16_ps(__b); } -/// \brief Converts the lower four unsigned 8-bit integer values from a 64-bit +/// Converts the lower four unsigned 8-bit integer values from a 64-bit /// vector of [8 x u8] into a 128-bit vector of [4 x float]. /// /// \headerfile @@ -2813,7 +2813,7 @@ _mm_cvtpi8_ps(__m64 __a) /// operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu8_ps(__m64 __a) { __m64 __b; @@ -2824,7 +2824,7 @@ _mm_cvtpu8_ps(__m64 __a) return _mm_cvtpi16_ps(__b); } -/// \brief Converts the two 32-bit signed integer values from each 64-bit vector +/// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of [2 x i32] into a 128-bit vector of [4 x float]. /// /// \headerfile @@ -2840,7 +2840,7 @@ _mm_cvtpu8_ps(__m64 __a) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// copied and converted values from the first operand. The upper 64 bits /// contain the copied and converted values from the second operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { __m128 __c; @@ -2852,7 +2852,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) return _mm_cvtpi32_ps(__c, __a); } -/// \brief Converts each single-precision floating-point element of a 128-bit +/// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into a 16-bit signed integer, and /// packs the results into a 64-bit integer vector of [4 x i16]. /// @@ -2869,7 +2869,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) /// A 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi16(__m128 __a) { __m64 __b, __c; @@ -2881,7 +2881,7 @@ _mm_cvtps_pi16(__m128 __a) return _mm_packs_pi32(__b, __c); } -/// \brief Converts each single-precision floating-point element of a 128-bit +/// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into an 8-bit signed integer, and /// packs the results into the lower 32 bits of a 64-bit integer vector of /// [8 x i8]. The upper 32 bits of the vector are set to 0. @@ -2899,7 +2899,7 @@ _mm_cvtps_pi16(__m128 __a) /// 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the /// converted values and the uppper 32 bits are set to zero. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi8(__m128 __a) { __m64 __b, __c; @@ -2910,7 +2910,7 @@ _mm_cvtps_pi8(__m128 __a) return _mm_packs_pi16(__b, __c); } -/// \brief Extracts the sign bits from each single-precision floating-point +/// Extracts the sign bits from each single-precision floating-point /// element of a 128-bit floating-point vector of [4 x float] and returns the /// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set /// to zero. @@ -3002,6 +3002,7 @@ do { \ #define _m_ _mm_ #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_MMX /* Ugly hack for backwards-compatibility (compatible with gcc) */ #if defined(__SSE2__) && !__building_module(_Builtin_intrinsics) diff --git a/lib/clang/7.0.0/include/xopintrin.h b/lib/clang/8.0.0/include/xopintrin.h similarity index 91% rename from lib/clang/7.0.0/include/xopintrin.h rename to lib/clang/8.0.0/include/xopintrin.h index 4a34f77..9d540a2 100644 --- a/lib/clang/7.0.0/include/xopintrin.h +++ b/lib/clang/8.0.0/include/xopintrin.h @@ -31,7 +31,8 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) @@ -201,7 +202,7 @@ _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); @@ -237,17 +238,17 @@ _mm_rot_epi64(__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); } -#define _mm_roti_epi8(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); }) +#define _mm_roti_epi8(A, N) \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) -#define _mm_roti_epi16(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); }) +#define _mm_roti_epi16(A, N) \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) -#define _mm_roti_epi32(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); }) +#define _mm_roti_epi32(A, N) \ + (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) -#define _mm_roti_epi64(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); }) +#define _mm_roti_epi64(A, N) \ + (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) @@ -297,37 +298,37 @@ _mm_sha_epi64(__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); } -#define _mm_com_epu8(A, B, N) __extension__ ({ \ +#define _mm_com_epu8(A, B, N) \ (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)); }) + (__v16qi)(__m128i)(B), (N)) -#define _mm_com_epu16(A, B, N) __extension__ ({ \ +#define _mm_com_epu16(A, B, N) \ (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)); }) + (__v8hi)(__m128i)(B), (N)) -#define _mm_com_epu32(A, B, N) __extension__ ({ \ +#define _mm_com_epu32(A, B, N) \ (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)); }) + (__v4si)(__m128i)(B), (N)) -#define _mm_com_epu64(A, B, N) __extension__ ({ \ +#define _mm_com_epu64(A, B, N) \ (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)); }) + (__v2di)(__m128i)(B), (N)) -#define _mm_com_epi8(A, B, N) __extension__ ({ \ +#define _mm_com_epi8(A, B, N) \ (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)); }) + (__v16qi)(__m128i)(B), (N)) -#define _mm_com_epi16(A, B, N) __extension__ ({ \ +#define _mm_com_epi16(A, B, N) \ (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)); }) + (__v8hi)(__m128i)(B), (N)) -#define _mm_com_epi32(A, B, N) __extension__ ({ \ +#define _mm_com_epi32(A, B, N) \ (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)); }) + (__v4si)(__m128i)(B), (N)) -#define _mm_com_epi64(A, B, N) __extension__ ({ \ +#define _mm_com_epi64(A, B, N) \ (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)); }) + (__v2di)(__m128i)(B), (N)) #define _MM_PCOMCTRL_LT 0 #define _MM_PCOMCTRL_LE 1 @@ -722,24 +723,24 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B) return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); } -#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ +#define _mm_permute2_pd(X, Y, C, I) \ (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__v2di)(__m128i)(C), (I)); }) + (__v2di)(__m128i)(C), (I)) -#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ +#define _mm256_permute2_pd(X, Y, C, I) \ (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ - (__v4di)(__m256i)(C), (I)); }) + (__v4di)(__m256i)(C), (I)) -#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ +#define _mm_permute2_ps(X, Y, C, I) \ (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ - (__v4si)(__m128i)(C), (I)); }) + (__v4si)(__m128i)(C), (I)) -#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ +#define _mm256_permute2_ps(X, Y, C, I) \ (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ - (__v8si)(__m256i)(C), (I)); }) + (__v8si)(__m256i)(C), (I)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) @@ -765,18 +766,19 @@ _mm_frcz_pd(__m128d __A) return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 #endif /* __XOPINTRIN_H */ diff --git a/lib/clang/7.0.0/include/xsavecintrin.h b/lib/clang/8.0.0/include/xsavecintrin.h similarity index 98% rename from lib/clang/7.0.0/include/xsavecintrin.h rename to lib/clang/8.0.0/include/xsavecintrin.h index 598470a..25577a9 100644 --- a/lib/clang/7.0.0/include/xsavecintrin.h +++ b/lib/clang/8.0.0/include/xsavecintrin.h @@ -1,4 +1,4 @@ -/*===---- xsavecintrin.h - XSAVEC intrinsic ------------------------------------=== +/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/clang/7.0.0/include/xsaveintrin.h b/lib/clang/8.0.0/include/xsaveintrin.h similarity index 91% rename from lib/clang/7.0.0/include/xsaveintrin.h rename to lib/clang/8.0.0/include/xsaveintrin.h index a2e6b2e..16f3a78 100644 --- a/lib/clang/7.0.0/include/xsaveintrin.h +++ b/lib/clang/8.0.0/include/xsaveintrin.h @@ -1,4 +1,4 @@ -/*===---- xsaveintrin.h - XSAVE intrinsic ------------------------------------=== +/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,23 +33,23 @@ static __inline__ void __DEFAULT_FN_ATTRS _xsave(void *__p, unsigned long long __m) { - return __builtin_ia32_xsave(__p, __m); + __builtin_ia32_xsave(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor(void *__p, unsigned long long __m) { - return __builtin_ia32_xrstor(__p, __m); + __builtin_ia32_xrstor(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsave64(void *__p, unsigned long long __m) { - return __builtin_ia32_xsave64(__p, __m); + __builtin_ia32_xsave64(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor64(void *__p, unsigned long long __m) { - return __builtin_ia32_xrstor64(__p, __m); + __builtin_ia32_xrstor64(__p, __m); } #endif diff --git a/lib/clang/7.0.0/include/xsaveoptintrin.h b/lib/clang/8.0.0/include/xsaveoptintrin.h similarity index 94% rename from lib/clang/7.0.0/include/xsaveoptintrin.h rename to lib/clang/8.0.0/include/xsaveoptintrin.h index d3faae7..792cf92 100644 --- a/lib/clang/7.0.0/include/xsaveoptintrin.h +++ b/lib/clang/8.0.0/include/xsaveoptintrin.h @@ -1,4 +1,4 @@ -/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ------------------------------------=== +/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,13 +33,13 @@ static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt(void *__p, unsigned long long __m) { - return __builtin_ia32_xsaveopt(__p, __m); + __builtin_ia32_xsaveopt(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt64(void *__p, unsigned long long __m) { - return __builtin_ia32_xsaveopt64(__p, __m); + __builtin_ia32_xsaveopt64(__p, __m); } #endif diff --git a/lib/clang/7.0.0/include/xsavesintrin.h b/lib/clang/8.0.0/include/xsavesintrin.h similarity index 99% rename from lib/clang/7.0.0/include/xsavesintrin.h rename to lib/clang/8.0.0/include/xsavesintrin.h index c5e540a..fe2bc4b 100644 --- a/lib/clang/7.0.0/include/xsavesintrin.h +++ b/lib/clang/8.0.0/include/xsavesintrin.h @@ -1,4 +1,4 @@ -/*===---- xsavesintrin.h - XSAVES intrinsic ------------------------------------=== +/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/clang/7.0.0/include/xtestintrin.h b/lib/clang/8.0.0/include/xtestintrin.h similarity index 99% rename from lib/clang/7.0.0/include/xtestintrin.h rename to lib/clang/8.0.0/include/xtestintrin.h index 9d3378f..9244243 100644 --- a/lib/clang/7.0.0/include/xtestintrin.h +++ b/lib/clang/8.0.0/include/xtestintrin.h @@ -1,4 +1,4 @@ -/*===---- xtestintrin.h - XTEST intrinsic ---------------------------------=== +/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib new file mode 100755 index 0000000..1486b6f Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_ios_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib new file mode 100755 index 0000000..0f863a8 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib new file mode 100755 index 0000000..18a74b0 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext.a new file mode 100644 index 0000000..2fb462d Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext.a differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext_ios.a similarity index 58% rename from lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext_ios.a rename to lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext_ios.a index 10a861c..40a2ca1 100644 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.cc_kext_ios.a and b/lib/clang/8.0.0/lib/darwin/libclang_rt.cc_kext_ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_ios.a new file mode 100644 index 0000000..54e20c7 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_iossim.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_iossim.a new file mode 100644 index 0000000..bf5ea11 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_iossim.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_ios.a new file mode 100644 index 0000000..267abcc Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_iossim.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_iossim.a new file mode 100644 index 0000000..218b916 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_iossim.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a new file mode 100644 index 0000000..d3e97e9 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_no_main_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_osx.a new file mode 100644 index 0000000..883707b Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.fuzzer_osx.a differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ios.a similarity index 58% rename from lib/clang/7.0.0/lib/darwin/libclang_rt.ios.a rename to lib/clang/8.0.0/lib/darwin/libclang_rt.ios.a index d972a40..c8b0979 100644 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ios.a and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib new file mode 100755 index 0000000..94fc1fe Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_ios_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib new file mode 100755 index 0000000..29b7a10 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_iossim_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib new file mode 100755 index 0000000..6542af8 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.lsan_osx_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.osx.a new file mode 100644 index 0000000..78725f3 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.osx.a differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_ios.a similarity index 50% rename from lib/clang/7.0.0/lib/darwin/libclang_rt.profile_ios.a rename to lib/clang/8.0.0/lib/darwin/libclang_rt.profile_ios.a index 94817a4..2d77994 100644 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.profile_ios.a and b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_iossim.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_iossim.a new file mode 100644 index 0000000..323f621 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_iossim.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_osx.a new file mode 100644 index 0000000..c7870b9 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.profile_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib new file mode 100755 index 0000000..e529242 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_ios_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib new file mode 100755 index 0000000..742d67c Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_iossim_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib new file mode 100755 index 0000000..bc1bc53 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios.a new file mode 100644 index 0000000..6af6671 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib new file mode 100755 index 0000000..4ec3cc1 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_ios_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim.a new file mode 100644 index 0000000..8424f17 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib new file mode 100755 index 0000000..1fd37d3 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a new file mode 100644 index 0000000..8657786 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios.a differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib similarity index 82% rename from lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib rename to lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib index e6a82c3..8191715 100755 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_ios_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a new file mode 100644 index 0000000..893a205 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim.a differ diff --git a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib similarity index 53% rename from lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib rename to lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib index afcd49b..5eaebdc 100755 Binary files a/lib/clang/7.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_iossim_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a new file mode 100644 index 0000000..b40648e Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib new file mode 100755 index 0000000..f15b5df Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_minimal_osx_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx.a new file mode 100644 index 0000000..503ab67 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib new file mode 100755 index 0000000..c40bbda Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-basic_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-basic_osx.a new file mode 100644 index 0000000..fecdcf4 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-basic_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a new file mode 100644 index 0000000..aac75a5 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-fdr_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-profiling_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-profiling_osx.a new file mode 100644 index 0000000..fcbc704 Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray-profiling_osx.a differ diff --git a/lib/clang/8.0.0/lib/darwin/libclang_rt.xray_osx.a b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray_osx.a new file mode 100644 index 0000000..050790b Binary files /dev/null and b/lib/clang/8.0.0/lib/darwin/libclang_rt.xray_osx.a differ diff --git a/lib/libBlinkGCPlugin.dylib b/lib/libBlinkGCPlugin.dylib index ba91a30..a4c28c6 100755 Binary files a/lib/libBlinkGCPlugin.dylib and b/lib/libBlinkGCPlugin.dylib differ diff --git a/lib/libFindBadConstructs.dylib b/lib/libFindBadConstructs.dylib index c6f1380..a7ee4b2 100755 Binary files a/lib/libFindBadConstructs.dylib and b/lib/libFindBadConstructs.dylib differ